Ejemplo n.º 1
0
    def __init__(self):
        # Try to figure out if we are indeed using the TCL version
        try:
            completed = os_ext.run_command('modulecmd -V')
        except OSError as e:
            raise ReframeError('could not find a sane Tmod installation: %s' %
                               e)

        version_match = re.search(r'^VERSION=(\S+)', completed.stdout,
                                  re.MULTILINE)
        tcl_version_match = re.search(r'^TCL_VERSION=(\S+)', completed.stdout,
                                      re.MULTILINE)

        if version_match is None or tcl_version_match is None:
            raise ReframeError('could not find a sane Tmod installation')

        self._version = version_match.group(1)
        self._command = 'modulecmd python'
        try:
            # Try the Python bindings now
            completed = os_ext.run_command(self._command)
        except OSError as e:
            raise ReframeError('could not get the Python bindings for Tmod: ' %
                               e)

        if re.search(r'Unknown shell type', completed.stderr):
            raise ReframeError(
                'Python is not supported by this Tmod installation')
Ejemplo n.º 2
0
    def cancel(self):
        getlogger().debug('cancelling job (id=%s)' % self._jobid)
        if self._jobid is None:
            raise ReframeError('no job is spawned yet')

        os_ext.run_command('scancel %s' % self._jobid,
                           check=True,
                           timeout=settings.job_submit_timeout)
        self._is_cancelling = True
Ejemplo n.º 3
0
    def _update_state(self):
        time_from_submit = datetime.now() - self.submit_time
        rem_wait = self.squeue_delay - time_from_submit.total_seconds()
        if rem_wait > 0:
            time.sleep(rem_wait)

        # We don't run the command with check=True, because if the job has
        # finished already, squeue might return an error about an invalid job id.
        completed = os_ext.run_command(
            'squeue -h -j %s -O state,exit_code,reason' % self._jobid)
        output = completed.stdout.strip()
        if not output:
            # Assume that job has finished
            self._state = (SLURM_JOB_CANCELLED
                           if self._cancelled else SLURM_JOB_COMPLETED)

            # Set exit code manually, if not set already by the polling
            if self._exitcode is None:
                self._exitcode = 0

            return

        # There is no reliable way to get the exit code, so we always capture
        # it, just in case we are lucky enough and get its actual value while
        # the job has finished but is still showing up in the queue (e.g., when
        # it is 'COMPLETING')
        state, exitcode, reason = output.split(maxsplit=2)
        self._state = SlurmJobState(state)
        self._exitcode = int(exitcode)
        if not self._is_cancelling and self._state in self._pending_states:
            self._check_and_cancel(reason)
Ejemplo n.º 4
0
    def _cancel_if_blocked(self):
        if self._is_cancelling or self._state not in self._pending_states:
            return

        completed = os_ext.run_command('squeue -h -j %s -o %%r' % self._jobid,
                                       check=True)
        if not completed.stdout:
            # Can't retrieve job's state. Perhaps it has finished already and
            # does not show up in the output of squeue
            return

        self._check_and_cancel(completed.stdout)
Ejemplo n.º 5
0
 def submit(self):
     cmd = 'sbatch %s' % self.script_filename
     completed = os_ext.run_command(cmd,
                                    check=True,
                                    timeout=settings.job_submit_timeout)
     jobid_match = re.search('Submitted batch job (?P<jobid>\d+)',
                             completed.stdout)
     if not jobid_match:
         raise JobSubmissionError(command=cmd,
                                  stdout=completed.stdout,
                                  stderr=completed.stderr,
                                  exitcode=completed.returncode)
     self._jobid = int(jobid_match.group('jobid'))
Ejemplo n.º 6
0
    def _compile_file(self, source_file, executable, lang, options):
        if not executable:
            # default executable, same as source_file without the extension
            executable = os.path.join(os.path.dirname(source_file),
                                      source_file.rsplit('.')[:-1][0])

        if not lang:
            lang  = self.guess_language(source_file)

        # Replace None's with empty strings
        cppflags = self.cppflags or ''
        cflags   = self.cflags   or ''
        cxxflags = self.cxxflags or ''
        fflags   = self.fflags   or ''
        ldflags  = self.ldflags  or ''

        flags = [cppflags]
        if lang == 'C':
            compiler = self.cc
            flags.append(cflags)
        elif lang == 'C++':
            compiler = self.cxx
            flags.append(cxxflags)
        elif lang == 'Fortran':
            compiler = self.ftn
            flags.append(fflags)
        elif lang == 'CUDA':
            compiler = 'nvcc'
            flags.append(cxxflags)
        else:
            raise ReframeError('Unknown language')

        # Append include search path
        flags += ['-I' + d for d in self.include_search_path]
        cmd = ('%s %s %s -o %s %s %s' % (compiler, ' '.join(flags),
                                         source_file, executable,
                                         ldflags, options))
        try:
            return os_ext.run_command(cmd, check=True)
        except CommandError as e:
            raise CompilationError(command=e.command,
                                   stdout=e.stdout,
                                   stderr=e.stderr,
                                   exitcode=e.exitcode,
                                   environ=self)
Ejemplo n.º 7
0
    def _update_state(self):
        """Check the status of the job."""

        completed = os_ext.run_command(
            'sacct -S %s -P -j %s -o jobid,state,exitcode' %
            (datetime.now().strftime('%F'), self._jobid),
            check=True)
        state_match = re.search(
            r'^(?P<jobid>\d+)\|(?P<state>\S+)([^\|]*)\|'
            r'(?P<exitcode>\d+)\:(?P<signal>\d+)', completed.stdout,
            re.MULTILINE)
        if state_match is None:
            getlogger().debug('job state not matched (stdout follows)\n%s' %
                              completed.stdout)
            return

        self._state = SlurmJobState(state_match.group('state'))
        self._cancel_if_blocked()
        if self._state in self._completion_states:
            self._exitcode = int(state_match.group('exitcode'))
Ejemplo n.º 8
0
def autodetect_system(site_config):
    """Auto-detect system"""
    import re
    import socket

    # Try to detect directly the cluster name from /etc/xthostname (Cray
    # specific)
    try:
        hostname = os_ext.run_command('cat /etc/xthostname', check=True).stdout
    except ReframeError:
        # Try to figure it out with the standard method
        hostname = socket.gethostname()

    # Go through the supported systems and try to match the hostname
    for system in site_config.systems.values():
        for hostname_patt in system.hostnames:
            if re.match(hostname_patt, hostname):
                return system

    return None
Ejemplo n.º 9
0
    def _compile_dir(self, source_dir, makefile, options):
        if makefile:
            cmd = 'make -C %s -f %s %s ' % (source_dir, makefile, options)
        else:
            cmd = 'make -C %s %s ' % (source_dir, options)

        # Pass a set of predefined options to the Makefile
        if self.propagate:
            flags = ["CC='%s'"  % self.cc,
                     "CXX='%s'" % self.cxx,
                     "FC='%s'"  % self.ftn]

            # Explicitly check against None here; the user may explicitly want
            # to clear the flags
            if self.cppflags is not None:
                flags.append("CPPFLAGS='%s'" % self.cppflags)

            if self.cflags is not None:
                flags.append("CFLAGS='%s'" % self.cflags)

            if self.cxxflags is not None:
                flags.append("CXXFLAGS='%s'" % self.cxxflags)

            if self.fflags is not None:
                flags.append("FFLAGS='%s'" % self.fflags)

            if self.ldflags is not None:
                flags.append("LDFLAGS='%s'" % self.ldflags)

            cmd += ' '.join(flags)

        try:
            return os_ext.run_command(cmd, check=True)
        except CommandError as e:
            raise CompilationError(command=e.command,
                                   stdout=e.stdout,
                                   stderr=e.stderr,
                                   exitcode=e.exitcode,
                                   environ=self)
Ejemplo n.º 10
0
 def test_command_timeout(self):
     try:
         os_ext.run_command('sleep 3', timeout=2)
         self.fail('Expected timeout')
     except CommandError as e:
         self.assertEqual(e.timeout, 2)
Ejemplo n.º 11
0
 def test_command_success(self):
     completed = os_ext.run_command('echo foobar')
     self.assertEqual(completed.returncode, 0)
     self.assertEqual(completed.stdout, 'foobar\n')
Ejemplo n.º 12
0
 def _run_module_command(self, *args):
     command = [self._command, *args]
     return os_ext.run_command(' '.join(command))