Example #1
0
    def _get_joblist_command(self, jobs=None, user=None):
        """
        The command to report full information on existing jobs.

        TODO: in the case of job arrays, decide what to do (i.e., if we want
              to pass the -t options to list each subjob).
        """
        from aiida.common.exceptions import FeatureNotAvailable

        command = [
            'qstat', '-f', '-w',
            '@arien-pro.ics.muni.cz @wagap-pro.cerit-sc.cz @pbs.elixir-czech.cz '
        ]

        if jobs and user:
            raise FeatureNotAvailable('Cannot query by user and job(s) in PBS')
        user = '******'
        if user:
            command.append('-u{}'.format(user))

        if jobs:
            if isinstance(jobs, six.string_types):
                command.append('{}'.format(escape_for_bash(jobs)))
            else:
                try:
                    command.append('{}'.format(' '.join(
                        escape_for_bash(j) for j in jobs)))
                except TypeError:
                    raise TypeError(
                        "If provided, the 'jobs' variable must be a string or an iterable of strings"
                    )

        comm = ' '.join(command)
        _LOGGER.debug('qstat command: {}'.format(comm))
        return comm
Example #2
0
    def gotocomputer_command(self, remotedir):
        """
        Specific gotocomputer string to connect to a given remote computer via
        ssh and directly go to the calculation folder.
        """

        # TODO: add also ProxyCommand and Timeout support

        further_params = []
        if 'username' in self._connect_args:
            further_params.append('-l {}'.format(escape_for_bash(self._connect_args['username'])))

        if 'port' in self._connect_args:
            further_params.append('-p {}'.format(self._connect_args['port']))

        if 'key_filename' in self._connect_args:
            further_params.append('-i {}'.format(escape_for_bash(self._connect_args['key_filename'])))

        further_params_str = ' '.join(further_params)
        connect_string = """ssh -t {machine} {further_params} "if [ -d {escaped_remotedir} ] ; then cd {escaped_remotedir} ; bash -l ; else echo '  ** The directory' ; echo '  ** {remotedir}' ; echo '  ** seems to have been deleted, I logout...' ; fi" """.format(
            further_params=further_params_str,
            machine=self._machine,
            escaped_remotedir="'{}'".format(remotedir),
            remotedir=remotedir)

        # print connect_string
        return connect_string
Example #3
0
    def _get_joblist_command(self, jobs=None, user=None):
        """
        The command to report full information on existing jobs.

        TODO: in the case of job arrays, decide what to do (i.e., if we want
              to pass the -t options to list each subjob).
        """
        # Using subprocess.Popen with `start_new_session=True` (as done in both local and ssh transport) results in
        # processes without a controlling terminal.
        # The -x option tells ps to include processes which do not have a controlling terminal, which would not be
        # listed otherwise (leading the direct scheduler to conclude that the process already completed).
        command = 'ps -xo pid,stat,user,time'

        if jobs:
            if isinstance(jobs, str):
                command += ' {}'.format(escape_for_bash(jobs))
            else:
                try:
                    command += ' {}'.format(' '.join(escape_for_bash(job) for job in jobs if job))
                except TypeError:
                    raise TypeError("If provided, the 'jobs' variable must be a string or a list of strings")

        command += '| tail -n +2'  # -header, do not use 'h'

        return command
Example #4
0
    def _get_run_line(self, codes_info, codes_run_mode):
        """Return a string with the line to execute a specific code with specific arguments.

        :parameter codes_info: a list of `aiida.common.datastructures.CodeInfo` objects. Each contains the information
            needed to run the code. I.e. `cmdline_params`, `stdin_name`, `stdout_name`, `stderr_name`, `join_files`. See
            the documentation of `JobTemplate` and `CodeInfo`.
        :parameter codes_run_mode: instance of `aiida.common.datastructures.CodeRunMode` contains the information on how
            to launch the multiple codes.
        :return: string with format: [executable] [args] {[ < stdin ]} {[ < stdout ]} {[2>&1 | 2> stderr]}
        """
        from aiida.common.datastructures import CodeRunMode

        list_of_runlines = []

        for code_info in codes_info:
            command_to_exec_list = []
            for arg in code_info.cmdline_params:
                command_to_exec_list.append(escape_for_bash(arg))
            command_to_exec = ' '.join(command_to_exec_list)

            stdin_str = '< {}'.format(escape_for_bash(
                code_info.stdin_name)) if code_info.stdin_name else ''
            stdout_str = '> {}'.format(escape_for_bash(
                code_info.stdout_name)) if code_info.stdout_name else ''

            join_files = code_info.join_files
            if join_files:
                stderr_str = '2>&1'
            else:
                stderr_str = '2> {}'.format(
                    escape_for_bash(code_info.stderr_name)
                ) if code_info.stderr_name else ''

            output_string = ('{} {} {} {}'.format(command_to_exec, stdin_str,
                                                  stdout_str, stderr_str))

            list_of_runlines.append(output_string)

        self.logger.debug('_get_run_line output: {}'.format(list_of_runlines))

        if codes_run_mode == CodeRunMode.PARALLEL:
            list_of_runlines.append('wait\n')
            return ' &\n\n'.join(list_of_runlines)

        if codes_run_mode == CodeRunMode.SERIAL:
            return '\n\n'.join(list_of_runlines)

        raise NotImplementedError('Unrecognized code run mode')
Example #5
0
    def rmtree(self, path):
        """
        Remove a file or a directory at path, recursively
        Flags used: -r: recursive copy; -f: force, makes the command non interactive;

        :param path: remote path to delete

        :raise IOError: if the rm execution failed.
        """
        # Assuming linux rm command!

        # TODO : do we need to avoid the aliases when calling rm_exe='rm'? Call directly /bin/rm?

        rm_exe = 'rm'
        rm_flags = '-r -f'
        # if in input I give an invalid object raise ValueError
        if not path:
            raise ValueError('Input to rmtree() must be a non empty string. ' + 'Found instead %s as path' % path)

        command = '{} {} {}'.format(rm_exe, rm_flags, escape_for_bash(path))

        retval, stdout, stderr = self.exec_command_wait(command)

        if retval == 0:
            if stderr.strip():
                self.logger.warning('There was nonempty stderr in the rm command: {}'.format(stderr))
            return True
        else:
            self.logger.error("Problem executing rm. Exit code: {}, stdout: '{}', "
                              "stderr: '{}'".format(retval, stdout, stderr))
            raise IOError('Error while executing rm. Exit code: {}'.format(retval))
Example #6
0
    def _exec_cp(self, cp_exe, cp_flags, src, dst):
        # to simplify writing the above copy function
        command = '{} {} {} {}'.format(cp_exe, cp_flags, escape_for_bash(src), escape_for_bash(dst))

        retval, stdout, stderr = self.exec_command_wait(command)

        # TODO : check and fix below

        if retval == 0:
            if stderr.strip():
                self.logger.warning('There was nonempty stderr in the cp command: {}'.format(stderr))
        else:
            self.logger.error("Problem executing cp. Exit code: {}, stdout: '{}', "
                              "stderr: '{}', command: '{}'".format(retval, stdout, stderr, command))
            raise IOError('Error while executing cp. Exit code: {}, '
                          "stdout: '{}', stderr: '{}', "
                          "command: '{}'".format(retval, stdout, stderr, command))
Example #7
0
    def _get_detailed_job_info_command(self, job_id):
        """
        Return the command to run to get the detailed information on a job,
        even after the job has finished.

        The output text is just retrieved, and returned for logging purposes.
        """
        return 'bjobs -l {}'.format(escape_for_bash(job_id))
    def submit_from_script(self, working_directory, submit_script):
        """Submit the submission script to the scheduler.

        :return: return a string with the job ID in a valid format to be used for querying.
        """
        self.transport.chdir(working_directory)
        result = self.transport.exec_command_wait(self._get_submit_command(escape_for_bash(submit_script)))
        return self._parse_submit_output(*result)
Example #9
0
    def _exec_command_internal(self, command, combine_stderr=False, bufsize=-1):
        """
        Executes the specified command in bash login shell.

        Before the command is executed, changes directory to the current
        working directory as returned by self.getcwd().

        For executing commands and waiting for them to finish, use
        exec_command_wait.

        :param  command: the command to execute. The command is assumed to be
            already escaped using :py:func:`aiida.common.escaping.escape_for_bash`.
        :param combine_stderr: (default False) if True, combine stdout and
                stderr on the same buffer (i.e., stdout).
                Note: If combine_stderr is True, stderr will always be empty.
        :param bufsize: same meaning of the one used by paramiko.

        :return: a tuple with (stdin, stdout, stderr, channel),
            where stdin, stdout and stderr behave as file-like objects,
            plus the methods provided by paramiko, and channel is a
            paramiko.Channel object.
        """
        channel = self.sshclient.get_transport().open_session()
        channel.set_combine_stderr(combine_stderr)

        if self.getcwd() is not None:
            escaped_folder = escape_for_bash(self.getcwd())
            command_to_execute = ('cd {escaped_folder} && '
                                  '{real_command}'.format(escaped_folder=escaped_folder, real_command=command))
        else:
            command_to_execute = command

        self.logger.debug('Command to be executed: {}'.format(command_to_execute))

        # Note: The default shell will eat one level of escaping, while
        # 'bash -l -c ...' will eat another. Thus, we need to escape again.
        channel.exec_command('bash -l -c ' + escape_for_bash(command_to_execute))

        stdin = channel.makefile('wb', bufsize)
        stdout = channel.makefile('rb', bufsize)
        stderr = channel.makefile_stderr('rb', bufsize)

        return stdin, stdout, stderr, channel
Example #10
0
def computer_config_show(computer, user, defaults, as_option_string):
    """Show the current configuration for a computer."""
    import tabulate
    from aiida.common.escaping import escape_for_bash

    transport_cls = computer.get_transport_class()
    option_list = [
        param for param in transport_cli.create_configure_cmd(
            computer.get_transport_type()).params
        if isinstance(param, click.core.Option)
    ]
    option_list = [
        option for option in option_list
        if option.name in transport_cls.get_valid_auth_params()
    ]

    if defaults:
        config = {
            option.name:
            transport_cli.transport_option_default(option.name, computer)
            for option in option_list
        }
    else:
        config = computer.get_configuration(user)

    option_items = []
    if as_option_string:
        for option in option_list:
            t_opt = transport_cls.auth_options[option.name]
            if config.get(option.name) or config.get(option.name) is False:
                if t_opt.get('switch'):
                    option_value = option.opts[-1] if config.get(
                        option.name) else '--no-{}'.format(
                            option.name.replace('_', '-'))
                elif t_opt.get('is_flag'):
                    is_default = config.get(
                        option.name) == transport_cli.transport_option_default(
                            option.name, computer)
                    option_value = option.opts[-1] if is_default else ''
                else:
                    option_value = '{}={}'.format(
                        option.opts[-1], option.type(config[option.name]))
                option_items.append(option_value)
        opt_string = ' '.join(option_items)
        echo.echo(escape_for_bash(opt_string))
    else:
        table = []
        for name in transport_cls.get_valid_auth_params():
            if name in config:
                table.append(('* ' + name, config[name]))
            else:
                table.append(('* ' + name, '-'))
        echo.echo(tabulate.tabulate(table, tablefmt='plain'))
Example #11
0
    def submit_from_script(self, working_directory, submit_script):
        """
        Goes in the working directory and submits the submit_script.

        Return a string with the JobID in a valid format to be used for
        querying.

        Typically, this function does not need to be modified by the plugins.
        """

        self.transport.chdir(working_directory)
        retval, stdout, stderr = self.transport.exec_command_wait(
            self._get_submit_command(escape_for_bash(submit_script)))
        return self._parse_submit_output(retval, stdout, stderr)
Example #12
0
def test_escape_for_bash():
    """Tests various inputs for `aiida.common.escaping.escape_for_bash`."""
    tests = (
        [None, ''],
        ['string', "'string'"],
        ['string with space', "'string with space'"],
        [
            "string with a ' single quote",
            """'string with a '"'"' single quote'"""
        ],
        [1, "'1'"],
        [2.0, "'2.0'"],
    )

    for string_input, string_escaped in tests:
        assert escape_for_bash(string_input) == string_escaped
Example #13
0
    def _exec_command_internal(self, command, **kwargs):  # pylint: disable=unused-argument
        """
        Executes the specified command in bash login shell.

        Before the command is executed, changes directory to the current
        working directory as returned by self.getcwd().

        For executing commands and waiting for them to finish, use
        exec_command_wait.
        Otherwise, to end the process, use the proc.wait() method.

        The subprocess is set to have a different process group than the
        main process, so that it is shielded from signals sent to the parent.

        :param  command: the command to execute. The command is assumed to be
            already escaped using :py:func:`aiida.common.escaping.escape_for_bash`.

        :return: a tuple with (stdin, stdout, stderr, proc),
            where stdin, stdout and stderr behave as file-like objects,
            proc is the process object as returned by the
            subprocess.Popen() class.
        """
        # pylint: disable=subprocess-popen-preexec-fn
        from aiida.common.escaping import escape_for_bash

        # Note: The outer shell will eat one level of escaping, while
        # 'bash -l -c ...' will eat another. Thus, we need to escape again.
        bash_commmand = f'{self._bash_command_str}-c '

        command = bash_commmand + escape_for_bash(command)

        proc = subprocess.Popen(command,
                                shell=True,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                cwd=self.getcwd(),
                                start_new_session=True)

        return proc.stdin, proc.stdout, proc.stderr, proc
Example #14
0
    def putfile(self,
                localpath,
                remotepath,
                callback=None,
                dereference=True,
                overwrite=True):
        """
        Put a file from local to remote.

        :param localpath: an (absolute) local path
        :param remotepath: a remote path
        :param overwrite: if True overwrites files and folders (boolean).
            Default = True.

        :raise ValueError: if local path is invalid
        :raise OSError: if the localpath does not exist,
                    or unintentionally overwriting
        """
        if not dereference:
            raise NotImplementedError

        if not os.path.isabs(localpath):
            raise ValueError('The localpath must be an absolute path')

        if self.isfile(remotepath) and not overwrite:
            raise OSError('Destination already exists: not overwriting it')

        file_size = os.stat(localpath).st_size
        #there is a maximum limit in linux kernel size of argument in a command (usually 131071) and fails afterwards.
        #Split in several parts when getting there
        if (file_size > 50000):
            self.logger.debug(
                'Trying to send a large file, will have to cut it : size {}'.
                format(file_size))
        firstchunk = True
        with open(localpath, 'rb') as fl:
            buffer = fl.read(50000)
            while buffer:
                exe = 'echo -n'
                exe_post_init = '| cat >'
                exe_post = '| cat >>'

                command = '{} {} {} {}'.format(
                    exe, escape_for_bash(paramiko.py3compat.u(buffer)),
                    exe_post_init if firstchunk else exe_post,
                    paramiko.py3compat.u(remotepath))

                retval, stdout, stderr = self.exec_command_wait(command)
                if retval == 0:
                    if stderr.strip():
                        self.logger.warning(
                            'There was nonempty stderr in the put command: {}'.
                            format(stderr))
                    if callback is not None:
                        size = self.stat(remotepath).st_size
                        callback(size, file_size)
                else:
                    self.logger.error(
                        "Problem executing put. Exit code: {}, stdout: '{}', "
                        "stderr: '{}'".format(retval, stdout, stderr))
                    raise IOError(
                        'Error while executing put. Exit code: {}'.format(
                            retval))
                firstchunk = False
                buffer = fl.read(50000)
Example #15
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.

        TODO: truncate the title if too long
        """
        import string

        empty_line = ''

        lines = []
        if job_tmpl.submit_as_hold:
            lines.append('#SBATCH -H')

        if job_tmpl.rerunnable:
            lines.append('#SBATCH --requeue')
        else:
            lines.append('#SBATCH --no-requeue')

        if job_tmpl.email:
            # If not specified, but email events are set, SLURM
            # sends the mail to the job owner by default
            lines.append('#SBATCH --mail-user={}'.format(job_tmpl.email))

        if job_tmpl.email_on_started:
            lines.append('#SBATCH --mail-type=BEGIN')
        if job_tmpl.email_on_terminated:
            lines.append('#SBATCH --mail-type=FAIL')
            lines.append('#SBATCH --mail-type=END')

        if job_tmpl.job_name:
            # The man page does not specify any specific limitation
            # on the job name.
            # Just to be sure, I remove unwanted characters, and I
            # trim it to length 128

            # I leave only letters, numbers, dots, dashes and underscores
            # Note: I don't compile the regexp, I am going to use it only once
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)

            # prepend a 'j' (for 'job') before the string if the string
            # is now empty or does not start with a valid charachter
            if not job_title or (job_title[0] not in string.ascii_letters + string.digits):
                job_title = 'j' + job_title

            # Truncate to the first 128 characters
            # Nothing is done if the string is shorter.
            job_title = job_title[:128]

            lines.append('#SBATCH --job-name="{}"'.format(job_title))

        if job_tmpl.import_sys_environment:
            lines.append('#SBATCH --get-user-env')

        if job_tmpl.sched_output_path:
            lines.append('#SBATCH --output={}'.format(job_tmpl.sched_output_path))

        if job_tmpl.sched_join_files:
            # TODO: manual says:
            # By  default both standard output and standard error are directed
            # to a file of the name "slurm-%j.out", where the "%j" is replaced
            # with  the  job  allocation  number.
            # See that this automatic redirection works also if
            # I specify a different --output file
            if job_tmpl.sched_error_path:
                self.logger.info('sched_join_files is True, but sched_error_path is set in '
                                 'SLURM script; ignoring sched_error_path')
        else:
            if job_tmpl.sched_error_path:
                lines.append('#SBATCH --error={}'.format(job_tmpl.sched_error_path))
            else:
                # To avoid automatic join of files
                lines.append('#SBATCH --error=slurm-%j.err')

        if job_tmpl.queue_name:
            lines.append('#SBATCH --partition={}'.format(job_tmpl.queue_name))

        if job_tmpl.account:
            lines.append('#SBATCH --account={}'.format(job_tmpl.account))

        if job_tmpl.qos:
            lines.append('#SBATCH --qos={}'.format(job_tmpl.qos))

        if job_tmpl.priority:
            #  Run the job with an adjusted scheduling priority  within  SLURM.
            #  With no adjustment value the scheduling priority is decreased by
            #  100. The adjustment range is from -10000 (highest  priority)  to
            #  10000  (lowest  priority).
            lines.append('#SBATCH --nice={}'.format(job_tmpl.priority))

        if not job_tmpl.job_resource:
            raise ValueError('Job resources (as the num_machines) are required for the SLURM scheduler plugin')

        lines.append('#SBATCH --nodes={}'.format(job_tmpl.job_resource.num_machines))
        if job_tmpl.job_resource.num_mpiprocs_per_machine:
            lines.append('#SBATCH --ntasks-per-node={}'.format(job_tmpl.job_resource.num_mpiprocs_per_machine))

        if job_tmpl.job_resource.num_cores_per_mpiproc:
            lines.append('#SBATCH --cpus-per-task={}'.format(job_tmpl.job_resource.num_cores_per_mpiproc))

        if job_tmpl.max_wallclock_seconds is not None:
            try:
                tot_secs = int(job_tmpl.max_wallclock_seconds)
                if tot_secs <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError('max_wallclock_seconds must be '
                                 "a positive integer (in seconds)! It is instead '{}'"
                                 ''.format((job_tmpl.max_wallclock_seconds)))
            days = tot_secs // 86400
            tot_hours = tot_secs % 86400
            hours = tot_hours // 3600
            tot_minutes = tot_hours % 3600
            minutes = tot_minutes // 60
            seconds = tot_minutes % 60
            if days == 0:
                lines.append('#SBATCH --time={:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds))
            else:
                lines.append('#SBATCH --time={:d}-{:02d}:{:02d}:{:02d}'.format(days, hours, minutes, seconds))

        # It is the memory per node, not per cpu!
        if job_tmpl.max_memory_kb:
            try:
                virtual_memory_kb = int(job_tmpl.max_memory_kb)
                if virtual_memory_kb <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError('max_memory_kb must be '
                                 "a positive integer (in kB)! It is instead '{}'"
                                 ''.format((job_tmpl.MaxMemoryKb)))
            # --mem: Specify the real memory required per node in MegaBytes.
            # --mem and  --mem-per-cpu  are  mutually exclusive.
            lines.append('#SBATCH --mem={}'.format(virtual_memory_kb // 1024))

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.

        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append('# ENVIRONMENT VARIABLES BEGIN ###')
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError('If you provide job_environment, it must be a dictionary')
            for key, value in job_tmpl.job_environment.items():
                lines.append('export {}={}'.format(key.strip(), escape_for_bash(value)))
            lines.append('# ENVIRONMENT VARIABLES  END  ###')
            lines.append(empty_line)

        lines.append(empty_line)

        return '\n'.join(lines)
Example #16
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl. See the following manual
        https://www-01.ibm.com/support/knowledgecenter/SSETD4_9.1.2/lsf_command_ref/bsub.1.dita?lang=en
        for more details about the possible options to bsub, in particular for
        the parallel environment definition (with the -m option).

        :param job_tmpl: an JobTemplate instance with relevant parameters set.
        """
        # pylint: disable=too-many-statements,too-many-branches
        import string
        import re

        empty_line = ''

        lines = []
        if job_tmpl.submit_as_hold:
            lines.append('#BSUB -H')

        if job_tmpl.rerunnable:
            lines.append('#BSUB -r')
        else:
            lines.append('#BSUB -rn')

        if job_tmpl.email:
            # If not specified, but email events are set, SLURM
            # sends the mail to the job owner by default
            lines.append('#BSUB -u {}'.format(job_tmpl.email))

        if job_tmpl.email_on_started:
            lines.append('#BSUB -B')
        if job_tmpl.email_on_terminated:
            lines.append('#BSUB -N')

        if job_tmpl.job_name:
            # The man page specifies only a limitation
            # on the job name to 4094 characters.
            # To be safe, I remove unwanted characters, and I
            # trim it to length 128.

            # I leave only letters, numbers, dots, dashes and underscores
            # Note: I don't compile the regexp, I am going to use it only once
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)

            # prepend a 'j' (for 'job') before the string if the string
            # is now empty or does not start with a valid character
            if not job_title or (job_title[0]
                                 not in string.ascii_letters + string.digits):
                job_title = 'j' + job_title

            # Truncate to the first 128 characters
            # Nothing is done if the string is shorter.
            job_title = job_title[:128]
            lines.append('#BSUB -J "{}"'.format(job_title))

        if not job_tmpl.import_sys_environment:
            self.logger.warning(
                'LSF scheduler cannot ignore the user environment')

        if job_tmpl.sched_output_path:
            lines.append('#BSUB -o {}'.format(job_tmpl.sched_output_path))

        sched_error_path = getattr(job_tmpl, 'sched_error_path', None)
        if job_tmpl.sched_join_files:
            sched_error_path = '{}_'.format(job_tmpl.sched_output_path)
            self.logger.warning('LSF scheduler does not support joining '
                                'the standard output and standard error '
                                'files; std error file assigned instead '
                                'to the file {}'.format(sched_error_path))

        if sched_error_path:
            lines.append('#BSUB -e {}'.format(job_tmpl.sched_error_path))

        if job_tmpl.queue_name:
            lines.append('#BSUB -q {}'.format(job_tmpl.queue_name))

        if job_tmpl.priority:
            # Specifies user-assigned job priority that orders all jobs
            # (from all users) in a queue. Valid values for priority
            # are any integers between 1 and MAX_USER_PRIORITY
            # (configured in lsb.params, displayed by "bparams -l").
            # Jobs are scheduled based first on their queue priority first, then
            # job priority, and lastly in first-come first-served order.
            lines.append('#BSUB -sp {}'.format(job_tmpl.priority))

        if not job_tmpl.job_resource:
            raise ValueError(
                'Job resources (as the tot_num_mpiprocs) are required for the LSF scheduler plugin'
            )

        lines.append('#BSUB -n {}'.format(
            job_tmpl.job_resource.get_tot_num_mpiprocs()))
        # Note:  make sure that PARALLEL_SCHED_BY_SLOT=Y is NOT
        # defined in lsb.params (you can check with the output of bparams -l).
        # Note: the -n option of bsub can also contain a maximum number of
        # procs to be used
        if job_tmpl.job_resource.parallel_env:
            lines.append('#BSUB -m "{}"'.format(
                job_tmpl.job_resource.parallel_env))

        if job_tmpl.max_wallclock_seconds is not None:
            # ABS_RUNLIMIT=Y should be set, in lsb.params (check with bparams -l)
            try:
                tot_secs = int(job_tmpl.max_wallclock_seconds)
                if tot_secs <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    'max_wallclock_seconds must be '
                    "a positive integer (in seconds)! It is instead '{}'"
                    ''.format((job_tmpl.max_wallclock_seconds)))
            hours = tot_secs // 3600
            # The double negation results in the ceiling rather than the floor
            # of the division
            minutes = -(-(tot_secs % 3600) // 60)
            lines.append('#BSUB -W {:02d}:{:02d}'.format(hours, minutes))

        # TODO: check if this is the memory per node  # pylint: disable=fixme
        if job_tmpl.max_memory_kb:
            try:
                virtual_memory_kb = int(job_tmpl.max_memory_kb)
                if virtual_memory_kb <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    'max_memory_kb must be '
                    "a positive integer (in kB)! It is instead '{}'"
                    ''.format((job_tmpl.MaxMemoryKb)))
            # The -M option sets a per-process (soft) memory limit for all the
            # processes that belong to this job
            lines.append('#BSUB -M {}'.format(virtual_memory_kb))

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # hand.
        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append('# ENVIRONMENT VARIABLES BEGIN ###')
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError(
                    'If you provide job_environment, it must be a dictionary')
            for key, value in job_tmpl.job_environment.items():
                lines.append('export {}={}'.format(key.strip(),
                                                   escape_for_bash(value)))
            lines.append('# ENVIRONMENT VARIABLES END  ###')
            lines.append(empty_line)

        lines.append(empty_line)

        # The following seems to be the only way to copy the input files
        # to the node where the computation are actually launched (the
        # -f option of bsub that does not always work...)
        # TODO: implement the case when LSB_OUTDIR is not properly defined...  # pylint: disable=fixme
        # (need to add the line "#BSUB -outdir PATH_TO_REMOTE_DIRECTORY")
        # IMPORTANT! the -z is needed, because if LSB_OUTDIR is not defined,
        # you would do 'cp -R /* .' basically copying ALL FILES in your
        # computer (including mounted partitions) in the current dir!!
        lines.append("""
if [ ! -z "$LSB_OUTDIR" ]
then
  cp -R "$LSB_OUTDIR"/* .
fi
""")

        return '\n'.join(lines)
Example #17
0
    def _get_submit_script_header(self, job_tmpl):
        """

        Parameters in job_resource are defined at NodeNumberJobResource in
        aiida-core as follows:

            _default_fields = (
                'num_machines',
                'num_mpiprocs_per_machine',
                'num_cores_per_machine',
                'num_cores_per_mpiproc',
            )

        With these parameters, the header part is given by

        #QSUB2 core {num_machines * num_cores_per_machine}
        #QSUB2 mpi {num_machines * num_mpiprocs_per_machine}
        #QSUB2 smp {num_cores_per_mpiproc}

        When 'num_cores_per_machine' is unspecified, 'num_mpiprocs_per_machine'
        is used instead of 'num_cores_per_machine'.

        Be sure 'tot_num_mpiprocs' can be used to specify
        'num_mpiprocs_per_machine' indirectly as
            num_mpiprocs_per_machine = tot_num_mpiprocs // num_machines

        """

        import re
        import string

        empty_line = ''

        lines = []
        if job_tmpl.queue_name:
            lines.append('#QSUB2 queue {}'.format(job_tmpl.queue_name))

        resource_lines = self._get_resource_lines(
            num_machines=job_tmpl.job_resource.num_machines,
            num_mpiprocs_per_machine=job_tmpl.job_resource.
            num_mpiprocs_per_machine,
            num_cores_per_machine=job_tmpl.job_resource.num_cores_per_machine,
            num_cores_per_mpiproc=job_tmpl.job_resource.num_cores_per_mpiproc,
            max_wallclock_seconds=job_tmpl.max_wallclock_seconds)
        lines += resource_lines

        if job_tmpl.job_name:
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)
            if not job_title or (job_title[0]
                                 not in string.ascii_letters + string.digits):
                job_title = 'j' + job_title
            job_title = job_title[:15]
            lines.append('#PBS -N {}'.format(job_title))

        if job_tmpl.sched_join_files:
            lines.append('#PBS -j oe')
            if job_tmpl.sched_error_path:
                _LOGGER.info(
                    'sched_join_files is True, but sched_error_path is set in '
                    'PBSPro script; ignoring sched_error_path')

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append('# ENVIRONMENT VARIABLES BEGIN ###')
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError(
                    'If you provide job_environment, it must be a dictionary')
            for key, value in job_tmpl.job_environment.items():
                lines.append('export {}={}'.format(key.strip(),
                                                   escape_for_bash(value)))
            lines.append('# ENVIRONMENT VARIABLES  END  ###')
            lines.append(empty_line)

        lines.append("cd $PBS_O_WORKDIR")
        lines.append(empty_line)

        return '\n'.join(lines)
Example #18
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.

        TODO: truncate the title if too long
        """
        import re
        import string

        empty_line = ''

        lines = []
        if job_tmpl.submit_as_hold:
            lines.append('#PBS -h')

        if job_tmpl.rerunnable:
            lines.append('#PBS -r y')
        else:
            lines.append('#PBS -r n')

        if job_tmpl.email:
            # If not specified, but email events are set, PBSPro
            # sends the mail to the job owner by default
            lines.append('#PBS -M {}'.format(job_tmpl.email))

        email_events = ''
        if job_tmpl.email_on_started:
            email_events += 'b'
        if job_tmpl.email_on_terminated:
            email_events += 'ea'
        if email_events:
            lines.append('#PBS -m {}'.format(email_events))
            if not job_tmpl.email:
                _LOGGER.info(
                    'Email triggers provided to PBSPro script for job,'
                    'but no email field set; will send emails to '
                    'the job owner as set in the scheduler')
        else:
            lines.append('#PBS -m n')

        if job_tmpl.job_name:
            # From qsub man page:
            # string, up to 15 characters in length.  It must
            # consist of an  alphabetic  or  numeric  character
            # followed  by printable, non-white-space characters.
            # Default:  if a script is used to submit the job, the job's name
            # is the name of the script.  If no script  is  used,  the  job's
            # name is "STDIN".
            #
            # I leave only letters, numbers, dots, dashes and underscores
            # Note: I don't compile the regexp, I am going to use it only once
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)

            # prepend a 'j' (for 'job') before the string if the string
            # is now empty or does not start with a valid charachter
            if not job_title or (job_title[0]
                                 not in string.ascii_letters + string.digits):
                job_title = 'j' + job_title

            # Truncate to the first 15 characters
            # Nothing is done if the string is shorter.
            job_title = job_title[:15]

            lines.append('#PBS -N {}'.format(job_title))

        if job_tmpl.import_sys_environment:
            lines.append('#PBS -V')

        if job_tmpl.sched_output_path:
            lines.append('#PBS -o {}'.format(job_tmpl.sched_output_path))

        if job_tmpl.sched_join_files:
            # from qsub man page:
            # 'oe': Standard error and standard output are merged  into
            #       standard output
            # 'eo': Standard error and standard output are merged  into
            #       standard error
            # 'n' : Standard error and standard output are not merged (default)
            lines.append('#PBS -j oe')
            if job_tmpl.sched_error_path:
                _LOGGER.info(
                    'sched_join_files is True, but sched_error_path is set in '
                    'PBSPro script; ignoring sched_error_path')
        else:
            if job_tmpl.sched_error_path:
                lines.append('#PBS -e {}'.format(job_tmpl.sched_error_path))

        if job_tmpl.queue_name:
            lines.append('#PBS -q {}'.format(job_tmpl.queue_name))

        if job_tmpl.account:
            lines.append('#PBS -A {}'.format(job_tmpl.account))

        if job_tmpl.priority:
            # Priority of the job.  Format: host-dependent integer.  Default:
            # zero.   Range:  [-1024,  +1023] inclusive.  Sets job's Priority
            # attribute to priority.
            # TODO: Here I expect that priority is passed in the correct PBSPro
            # format. To fix.
            lines.append('#PBS -p {}'.format(job_tmpl.priority))

        if not job_tmpl.job_resource:
            raise ValueError(
                'Job resources (as the num_machines) are required for the PBSPro scheduler plugin'
            )

        resource_lines = self._get_resource_lines(
            num_machines=job_tmpl.job_resource.num_machines,
            num_mpiprocs_per_machine=job_tmpl.job_resource.
            num_mpiprocs_per_machine,
            num_cores_per_machine=job_tmpl.job_resource.num_cores_per_machine,
            max_memory_kb=job_tmpl.max_memory_kb,
            max_wallclock_seconds=job_tmpl.max_wallclock_seconds)

        lines += resource_lines

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.

        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append('# ENVIRONMENT VARIABLES BEGIN ###')
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError(
                    'If you provide job_environment, it must be a dictionary')
            for key, value in job_tmpl.job_environment.items():
                lines.append('export {}={}'.format(key.strip(),
                                                   escape_for_bash(value)))
            lines.append('# ENVIRONMENT VARIABLES  END  ###')
            lines.append(empty_line)

        # Required to change directory to the working directory, that is
        # the one from which the job was submitted
        lines.append('cd "$PBS_O_WORKDIR"')
        lines.append(empty_line)

        return '\n'.join(lines)
Example #19
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.

        TODO: truncate the title if too long
        """
        import re
        import string

        empty_line = ''

        lines = []

        # SGE provides flags for wd and cwd
        if job_tmpl.working_directory:
            lines.append('#$ -wd {}'.format(job_tmpl.working_directory))
        else:
            lines.append('#$ -cwd')

        # Enforce bash shell
        lines.append('#$ -S /bin/bash')

        if job_tmpl.submit_as_hold:
            # if isinstance(job_tmpl.submit_as_hold, str):
            lines.append('#$ -h {}'.format(job_tmpl.submit_as_hold))

        if job_tmpl.rerunnable:
            # if isinstance(job_tmpl.rerunnable, str):
            lines.append('#$ -r {}'.format(job_tmpl.rerunnable))

        if job_tmpl.email:
            # If not specified, but email events are set, PBSPro
            # sends the mail to the job owner by default
            lines.append('#$ -M {}'.format(job_tmpl.email))

        email_events = ''
        if job_tmpl.email_on_started:
            email_events += 'b'
        if job_tmpl.email_on_terminated:
            email_events += 'ea'
        if email_events:
            lines.append('#$ -m {}'.format(email_events))
            if not job_tmpl.email:
                self.logger.info(
                    'Email triggers provided to SGE script for job,'
                    'but no email field set; will send emails to '
                    'the job owner as set in the scheduler')
        else:
            lines.append('#$ -m n')

        # From the qsub man page:
        # "The name may be any arbitrary alphanumeric ASCII string, but
        # may  not contain  "\n", "\t", "\r", "/", ":", "@", "\", "*",
        # or "?"."
        if job_tmpl.job_name:
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)

            # prepend a 'j' (for 'job') before the string if the string
            # is now empty or does not start with a valid character
            # (the first symbol cannot be digit, at least in some versions
            #  of the scheduler)
            if not job_title or (job_title[0] not in string.ascii_letters):
                job_title = 'j' + job_title

            lines.append('#$ -N {}'.format(job_tmpl.job_name))

        if job_tmpl.import_sys_environment:
            lines.append('#$ -V')

        if job_tmpl.sched_output_path:
            lines.append('#$ -o {}'.format(job_tmpl.sched_output_path))

        if job_tmpl.sched_join_files:
            # from qsub man page:
            # 'y': Standard error and standard output are merged  into
            #       standard output
            # 'n' : Standard error and standard output are not merged (default)
            lines.append('#$ -j y')
            if job_tmpl.sched_error_path:
                self.logger.info(
                    'sched_join_files is True, but sched_error_path is set in '
                    'SGE script; ignoring sched_error_path')
        else:
            if job_tmpl.sched_error_path:
                lines.append('#$ -e {}'.format(job_tmpl.sched_error_path))

        if job_tmpl.queue_name:
            lines.append('#$ -q {}'.format(job_tmpl.queue_name))

        if job_tmpl.account:
            lines.append('#$ -P {}'.format(job_tmpl.account))

        if job_tmpl.priority:
            # Priority of the job.  Format: host-dependent integer.  Default:
            # zero.   Range:  [-1023,  +1024].  Sets job's Priority
            # attribute to priority.
            lines.append('#$ -p {}'.format(job_tmpl.priority))

        if not job_tmpl.job_resource:
            raise ValueError(
                'Job resources (as the tot_num_mpiprocs) are required for the SGE scheduler plugin'
            )
        # Setting up the parallel environment
        lines.append('#$ -pe {} {}'. \
                     format(str(job_tmpl.job_resource.parallel_env), \
                            int(job_tmpl.job_resource.tot_num_mpiprocs)))

        if job_tmpl.max_wallclock_seconds is not None:
            try:
                tot_secs = int(job_tmpl.max_wallclock_seconds)
                if tot_secs <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    'max_wallclock_seconds must be '
                    "a positive integer (in seconds)! It is instead '{}'"
                    ''.format((job_tmpl.max_wallclock_seconds)))
            hours = tot_secs // 3600
            tot_minutes = tot_secs % 3600
            minutes = tot_minutes // 60
            seconds = tot_minutes % 60
            lines.append('#$ -l h_rt={:02d}:{:02d}:{:02d}'.format(
                hours, minutes, seconds))

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # TAKEN FROM PBSPRO:
        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.
        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append('# ENVIRONMENT VARIABLES BEGIN ###')
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError(
                    'If you provide job_environment, it must be a dictionary')
            for key, value in job_tmpl.job_environment.items():
                lines.append('export {}={}'.format(key.strip(),
                                                   escape_for_bash(value)))
            lines.append('# ENVIRONMENT VARIABLES  END  ###')
            lines.append(empty_line)

        return '\n'.join(lines)
Example #20
0
 def _get_detailed_jobinfo_command(self, jobid):
     command = 'qacct -j {}'.format(escape_for_bash(jobid))
     return command
Example #21
0
    def _get_run_line(self, codes_info, codes_run_mode):
        """
        Return a string with the line to execute a specific code with
        specific arguments.

        :parameter codes_info: a list of aiida.common.datastructures.CodeInfo
          objects. Each contains the information needed to run the code. I.e.
          cmdline_params, stdin_name, stdout_name, stderr_name, join_files.
          See the documentation of JobTemplate and CodeInfo
        :parameter codes_run_mode: contains the information on how to launch the
          multiple codes. As described in aiida.common.datastructures.CodeRunMode


            argv: an array with the executable and the command line arguments.
              The first argument is the executable. This should contain
              everything, including the mpirun command etc.
            stdin_name: the filename to be used as stdin, relative to the
              working dir, or None if no stdin redirection is required.
            stdout_name: the filename to be used to store the standard output,
              relative to the working dir,
              or None if no stdout redirection is required.
            stderr_name: the filename to be used to store the standard error,
              relative to the working dir,
              or None if no stderr redirection is required.
            join_files: if True, stderr is redirected to stdout; the value of
              stderr_name is ignored.

        Return a string with the following format:
        [executable] [args] {[ < stdin ]} {[ < stdout ]} {[2>&1 | 2> stderr]}
        """
        from aiida.common.datastructures import CodeRunMode

        list_of_runlines = []

        for code_info in codes_info:
            command_to_exec_list = []
            for arg in code_info.cmdline_params:
                command_to_exec_list.append(escape_for_bash(arg))
            command_to_exec = ' '.join(command_to_exec_list)

            stdin_str = '< {}'.format(escape_for_bash(code_info.stdin_name)) if code_info.stdin_name else ''
            stdout_str = '> {}'.format(escape_for_bash(code_info.stdout_name)) if code_info.stdout_name else ''

            join_files = code_info.join_files
            if join_files:
                stderr_str = '2>&1'
            else:
                stderr_str = '2> {}'.format(escape_for_bash(code_info.stderr_name)) if code_info.stderr_name else ''

            output_string = ('{} {} {} {}'.format(command_to_exec, stdin_str, stdout_str, stderr_str))

            list_of_runlines.append(output_string)

        self.logger.debug('_get_run_line output: {}'.format(list_of_runlines))

        if codes_run_mode == CodeRunMode.PARALLEL:
            list_of_runlines.append('wait\n')
            return ' &\n\n'.join(list_of_runlines)

        if codes_run_mode == CodeRunMode.SERIAL:
            return '\n\n'.join(list_of_runlines)

        raise NotImplementedError('Unrecognized code run mode')
Example #22
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.
        """
        # pylint: disable=too-many-branches

        lines = []
        empty_line = ''

        # Redirecting script output on the correct files
        # Should be one of the first commands
        if job_tmpl.sched_output_path:
            lines.append('exec > {}'.format(job_tmpl.sched_output_path))

        if job_tmpl.sched_join_files:
            # TODO: manual says:  # pylint: disable=fixme
            # By  default both standard output and standard error are directed
            # to a file of the name "slurm-%j.out", where the "%j" is replaced
            # with  the  job  allocation  number.
            # See that this automatic redirection works also if
            # I specify a different --output file
            if job_tmpl.sched_error_path:
                self.logger.info('sched_join_files is True, but sched_error_path is set; ignoring sched_error_path')
        else:
            if job_tmpl.sched_error_path:
                lines.append('exec 2> {}'.format(job_tmpl.sched_error_path))
            else:
                # To avoid automatic join of files
                lines.append('exec 2>&1')

        if job_tmpl.max_memory_kb:
            try:
                virtual_memory_kb = int(job_tmpl.max_memory_kb)
                if virtual_memory_kb <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    'max_memory_kb must be '
                    "a positive integer (in kB)! It is instead '{}'"
                    ''.format((job_tmpl.max_memory_kb))
                )
            lines.append('ulimit -v {}'.format(virtual_memory_kb))
        if not job_tmpl.import_sys_environment:
            lines.append('env --ignore-environment \\')

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.

        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append('# ENVIRONMENT VARIABLES BEGIN ###')
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError('If you provide job_environment, it must be a dictionary')
            for key, value in job_tmpl.job_environment.items():
                lines.append('export {}={}'.format(key.strip(), escape_for_bash(value)))
            lines.append('# ENVIRONMENT VARIABLES  END  ###')
            lines.append(empty_line)

        lines.append(empty_line)

        ## The following code is not working as there's an empty line
        ## inserted between the header and the actual command.
        # if job_tmpl.max_wallclock_seconds is not None:
        #     try:
        #         tot_secs = int(job_tmpl.max_wallclock_seconds)
        #         if tot_secs <= 0:
        #             raise ValueError
        #     except ValueError:
        #         raise ValueError(
        #             "max_wallclock_seconds must be "
        #             "a positive integer (in seconds)! It is instead '{}'"
        #             "".format((job_tmpl.max_wallclock_seconds)))
        #     lines.append("timeout {} \\".format(tot_secs))

        return '\n'.join(lines)