Example #1
0
    def __init__(self):
        # Try to figure out if we are indeed using LMOD
        self._lmod_cmd = os.getenv('LMOD_CMD')
        if self._lmod_cmd is None:
            raise ConfigError('could not find a sane Lmod installation: '
                              'environment variable LMOD_CMD is not defined')

        try:
            completed = osext.run_command(f'{self._lmod_cmd} --version')
        except OSError as e:
            raise ConfigError(
                'could not find a sane Lmod installation: %s' % e)

        version_match = re.search(r'.*Version\s*(\S+)', completed.stderr,
                                  re.MULTILINE)
        if version_match is None:
            raise ConfigError('could not retrieve Lmod version')

        self._version = version_match.group(1)
        try:
            # Try the Python bindings now
            completed = osext.run_command(self.modulecmd())
        except OSError as e:
            raise ConfigError(
                'could not get the Python bindings for Lmod: ' % e)

        if re.search(r'Unknown shell type', completed.stderr):
            raise ConfigError('Python is not supported by '
                              'this Lmod installation')

        self._extra_module_paths = []
Example #2
0
def git_only():
    try:
        osext.run_command('git --version', check=True, log=False)
    except (SpawnedProcessError, FileNotFoundError):
        pytest.skip('no git installation found on system')

    try:
        osext.run_command('git status', check=True, log=False)
    except (SpawnedProcessError, FileNotFoundError):
        pytest.skip('not inside a git repository')
Example #3
0
def test_command_timeout():
    with pytest.raises(SpawnedProcessTimeout,
                       match=r"command 'sleep 3' timed out "
                       r'after 2s') as exc_info:

        osext.run_command('sleep 3', timeout=2)

    assert exc_info.value.timeout == 2

    # Try to get the string repr. of the exception: see bug #658
    str(exc_info.value)
Example #4
0
def test_trap_error(script_file):
    with shell.generate_script(script_file, trap_errors=True) as gen:
        gen.write('false')
        gen.write('echo hello')

    with pytest.raises(SpawnedProcessError) as cm:
        osext.run_command(str(script_file), check=True)

    exc = cm.value
    assert 'hello' not in exc.stdout
    assert 1 == exc.exitcode
    assert "-reframe: command `false' failed (exit code: 1)" in exc.stdout
Example #5
0
    def __init__(self):
        try:
            completed = osext.run_command(self.modulecmd('-V'), check=True)
        except OSError as e:
            raise ConfigError(
                'could not find a sane TMod4 installation') from e
        except SpawnedProcessError as e:
            raise ConfigError(
                'could not get the Python bindings for TMod4') from e

        version_match = re.match(r'^Modules Release (\S+)\s+',
                                 completed.stderr)
        if not version_match:
            raise ConfigError('could not retrieve the TMod4 version')

        version = version_match.group(1)
        try:
            ver_major, ver_minor = [int(v) for v in version.split('.')[:2]]
        except ValueError:
            raise ConfigError(
                'could not parse TMod4 version string: ' + version) from None

        if (ver_major, ver_minor) < self.MIN_VERSION:
            raise ConfigError(
                'unsupported TMod4 version: %s (required >= %s)' %
                (version, self.MIN_VERSION))

        self._version = version
        self._extra_module_paths = []
def _cray_cle_version():
    completed = osext.run_command('cat /etc/opt/cray/release/cle-release')
    matched = re.match(r'^RELEASE=(\S+)', completed.stdout)
    if matched is None:
        return None

    return matched.group(1)
Example #7
0
def test_command_stdin(tmp_path):
    with open(tmp_path / 'in.txt', 'w') as fp:
        fp.write('hello')

    with open(tmp_path / 'in.txt') as fp:
        completed = osext.run_command('cat', stdin=fp)

    assert completed.stdout == 'hello'
Example #8
0
def test_trap_exit(script_file):
    with shell.generate_script(script_file, trap_exit=True) as gen:
        gen.write('echo hello')

    completed = osext.run_command(str(script_file), check=True)
    assert 'hello' in completed.stdout
    assert 0 == completed.returncode
    assert '-reframe: script exiting with exit code: 0' in completed.stdout
Example #9
0
    def _execute(self, cmd, *args):
        modulecmd = self.modulecmd(cmd, *args)
        completed = osext.run_command(modulecmd)
        if re.search(r'\bERROR\b', completed.stderr) is not None:
            raise SpawnedProcessError(modulecmd, completed.stdout,
                                      completed.stderr, completed.returncode)

        exec(self.process(completed.stdout))
        return completed.stderr
Example #10
0
    def __init__(self):
        # Try to figure out if we are indeed using the TCL version
        try:
            completed = osext.run_command('spack -V')
        except OSError as e:
            raise ConfigError(
                'could not find a sane Spack installation') from e

        self._version = completed.stdout.strip()
        self._name_format = '{name}/{version}-{hash}'
Example #11
0
    def __init__(self):
        # Try to figure out if we are indeed using the TCL version
        try:
            modulecmd = os.getenv('MODULESHOME')
            modulecmd = os.path.join(modulecmd, 'modulecmd.tcl')
            completed = osext.run_command(modulecmd)
        except OSError as e:
            raise ConfigError(
                'could not find a sane TMod31 installation: %s' % e) from e

        version_match = re.search(r'Release Tcl (\S+)', completed.stderr,
                                  re.MULTILINE)
        tcl_version_match = version_match

        if version_match is None or tcl_version_match is None:
            raise ConfigError('could not find a sane TMod31 installation')

        version = version_match.group(1)
        try:
            ver_major, ver_minor = [int(v) for v in version.split('.')[:2]]
        except ValueError:
            raise ConfigError(
                'could not parse TMod31 version string: ' + version) from None

        if (ver_major, ver_minor) < self.MIN_VERSION:
            raise ConfigError(
                'unsupported TMod version: %s (required >= %s)' %
                (version, self.MIN_VERSION))

        self._version = version
        self._command = '%s python' % modulecmd

        try:
            # Try the Python bindings now
            completed = osext.run_command(self._command)
        except OSError as e:
            raise ConfigError(
                'could not get the Python bindings for TMod31: ' % e) from e

        if re.search(r'Unknown shell type', completed.stderr):
            raise ConfigError(
                'Python is not supported by this TMod installation')
Example #12
0
    def __init__(self):
        # Try to figure out if we are indeed using the TCL version
        try:
            completed = osext.run_command('modulecmd -V')
        except OSError as e:
            raise ConfigError(
                'could not find a sane TMod installation') from e

        version_match = re.search(r'^VERSION=(\S+)', completed.stdout,
                                  re.MULTILINE)
        tcl_version_match = re.search(r'^TCL_VERSION=(\S+)', completed.stdout,
                                      re.MULTILINE)

        if version_match is None or tcl_version_match is None:
            raise ConfigError('could not find a sane TMod installation')

        version = version_match.group(1)
        try:
            ver_major, ver_minor = [int(v) for v in version.split('.')[:2]]
        except ValueError:
            raise ConfigError(
                'could not parse TMod version string: ' + version) from None

        if (ver_major, ver_minor) < self.MIN_VERSION:
            raise ConfigError(
                'unsupported TMod version: %s (required >= %s)' %
                (version, self.MIN_VERSION))

        self._version = version
        try:
            # Try the Python bindings now
            completed = osext.run_command(self.modulecmd())
        except OSError as e:
            raise ConfigError(
                'could not get the Python bindings for TMod: ' % e) from e

        if re.search(r'Unknown shell type', completed.stderr):
            raise ConfigError(
                'Python is not supported by this TMod installation')
Example #13
0
    def _execute(self, cmd, *args):
        modulecmd = self.modulecmd(cmd, *args)
        completed = osext.run_command(modulecmd, check=False)
        namespace = {}
        exec(self.process(completed.stdout), {}, namespace)

        # _mlstatus is set by the TMod4 only if the command was unsuccessful,
        # but Lmod sets it always
        if not namespace.get('_mlstatus', True):
            raise SpawnedProcessError(modulecmd, completed.stdout,
                                      completed.stderr, completed.returncode)

        return completed.stderr
Example #14
0
    def poll(self, *jobs):
        if jobs:
            # Filter out non-jobs
            jobs = [job for job in jobs if job is not None]

        if not jobs:
            return

        m = max(job.submit_time for job in jobs)
        time_from_last_submit = time.time() - m
        rem_wait = self.SQUEUE_DELAY - time_from_last_submit
        if rem_wait > 0:
            time.sleep(rem_wait)

        # We don't run the command with check=True, because if the job has
        # finished already, squeue might return an error about an invalid
        # job id.
        completed = osext.run_command(
            f'squeue -h -j {",".join(job.jobid for job in jobs)} '
            f'-o "%%i|%%T|%%N|%%r"')

        # We need the match objects, so we have to use finditer()
        state_match = list(
            re.finditer(
                fr'^(?P<jobid>{self._jobid_patt})\|(?P<state>\S+)\|'
                fr'(?P<nodespec>\S*)\|(?P<reason>.+)', completed.stdout,
                re.MULTILINE))
        jobinfo = {}
        for s in state_match:
            jobid = s.group('jobid').split('_')[0]
            jobinfo.setdefault(jobid, []).append(s)

        for job in jobs:
            if job is None:
                continue

            try:
                job_match = jobinfo[job.jobid]
            except KeyError:
                job._state = 'CANCELLED' if job.is_cancelling else 'COMPLETED'
                continue

            # Join the states with ',' in case of job arrays
            job._state = ','.join(s.group('state') for s in job_match)
            self._cancel_if_blocked(job,
                                    [s.group('reason') for s in state_match])
            self._cancel_if_pending_too_long(job)
Example #15
0
    def _execute(self, cmd, *args):
        modulecmd = self.modulecmd(cmd, *args)
        completed = osext.run_command(modulecmd)
        if re.search(r'\bERROR\b', completed.stderr) is not None:
            raise SpawnedProcessError(modulecmd, completed.stdout,
                                      completed.stderr, completed.returncode)

        exec_match = re.search(r"^exec\s'(\S+)'", completed.stdout,
                               re.MULTILINE)
        if exec_match is None:
            raise ConfigError('could not use the python bindings')

        with open(exec_match.group(1), 'r') as content_file:
            cmd = content_file.read()

        exec(self.process(cmd))
        return completed.stderr
Example #16
0
def test_command_success():
    completed = osext.run_command('echo foobar')
    assert completed.returncode == 0
    assert completed.stdout == 'foobar\n'
Example #17
0
def _sysctl_topo():
    try:
        exec_output = osext.run_command('sysctl hw machdep.cpu', check=True)
    except (FileNotFoundError, SpawnedProcessError):
        return {}

    cpuinfo = {'topology': {}}
    match = re.search(r'hw\.ncpu: (?P<num_cpus>\d+)', exec_output.stdout)
    if match:
        num_cpus = int(match.group('num_cpus'))

    match = re.search(r'hw\.physicalcpu: (?P<num_cores>\d+)',
                      exec_output.stdout)
    if match:
        num_cores = int(match.group('num_cores'))

    match = re.search(r'hw\.packages: (?P<num_sockets>\d+)',
                      exec_output.stdout)
    if match:
        num_sockets = int(match.group('num_sockets'))
        cpuinfo['num_sockets'] = num_sockets

    match = re.search(r'hw\.cacheconfig:(?P<cacheconfig>(\s\d+)*)',
                      exec_output.stdout)
    if match:
        cacheconfig = list(map(int, match.group('cacheconfig').split()))

    match = re.search(r'hw\.cachesize:(?P<cachesize>(\s\d+)*)',
                      exec_output.stdout)
    if match:
        cachesize = list(map(int, match.group('cachesize').split()))

    match = re.search(r'hw\.cachelinesize: (?P<linesize>\d+)',
                      exec_output.stdout)
    if match:
        linesize = int(match.group('linesize'))

    # index 0 is referring to memory
    cache_associativity = [0]
    for i in range(1, len(cachesize)):
        if cachesize[i] == 0:
            break

        match = re.search(
            rf'machdep\.cpu\.cache\.L{i}_associativity: '
            rf'(?P<associativity>\d+)', exec_output.stdout)
        assoc = int(match.group('associativity')) if match else 0
        cache_associativity.append(assoc)

    num_cpus_per_socket = num_cpus // num_sockets
    num_cpus_per_core = num_cpus // num_cores

    # Fill in the cpuinfo
    cpuinfo['num_cpus'] = num_cpus
    cpuinfo['num_cpus_per_socket'] = num_cpus_per_socket
    cpuinfo['num_cpus_per_core'] = num_cpus_per_core
    cpuinfo['topology']['numa_nodes'] = [_str_from_bits(range(num_cpus))]
    cpuinfo['topology']['sockets'] = [
        _str_from_bits(range(start, start + num_cpus_per_socket))
        for start in range(0, num_cpus, num_cpus_per_socket)
    ]
    cpuinfo['topology']['cores'] = [
        _str_from_bits(range(start, start + num_cpus_per_core))
        for start in range(0, num_cpus, num_cpus_per_core)
    ]
    cpuinfo['topology']['caches'] = []
    for i in range(1, len(cache_associativity)):
        t = {
            'type':
            f'L{i}',
            'size':
            cachesize[i],
            'linesize':
            linesize,
            'associativity':
            cache_associativity[i],
            'num_cpus':
            cacheconfig[i],
            'cpusets': [
                _str_from_bits(range(start, start + cacheconfig[i]))
                for start in range(0, num_cpus, cacheconfig[i])
            ]
        }
        cpuinfo['topology']['caches'].append(t)

    return cpuinfo
Example #18
0
 def _execute(self, cmd, *args):
     modulecmd = self.modulecmd(cmd, *args)
     completed = osext.run_command(modulecmd, check=True)
     return completed.stdout
Example #19
0
    def poll(self, *jobs):
        if jobs:
            # Filter out non-jobs
            jobs = [job for job in jobs if job is not None]

        if not jobs:
            return

        user = osext.osuser()
        completed = osext.run_command(f'qstat -xml -u {user}')
        if completed.returncode != 0:
            raise JobSchedulerError(
                f'qstat failed with exit code {completed.returncode} '
                f'(standard error follows):\n{completed.stderr}')

        # Index the jobs to poll on their jobid
        jobs_to_poll = {job.jobid: job for job in jobs}

        # Parse the XML
        root = ET.fromstring(completed.stdout)

        # We are iterating over the returned XML and update the status of the
        # jobs relevant to ReFrame; the naming convention of variables matches
        # that of SGE's XML output

        known_jobs = set()  # jobs known to the SGE scheduler
        for queue_info in root:
            # Reads the XML and prints jobs with status belonging to user.
            if queue_info is None:
                raise JobSchedulerError('could not retrieve queue information')

            for job_list in queue_info:
                if job_list.find("JB_owner").text != user:
                    # Not a job of this user.
                    continue

                jobid = job_list.find("JB_job_number").text
                if jobid not in jobs_to_poll:
                    # Not a reframe job
                    continue

                state = job_list.find("state").text
                job = jobs_to_poll[jobid]
                known_jobs.add(job)

                # For the list of known statuses see `man 5 sge_status`
                # (https://arc.liv.ac.uk/SGE/htmlman/htmlman5/sge_status.html)
                if state in ['r', 'hr', 't', 'Rr', 'Rt']:
                    job._state = 'RUNNING'
                elif state in ['qw', 'Rq', 'hqw', 'hRwq']:
                    job._state = 'PENDING'
                elif state in [
                        's', 'ts', 'S', 'tS', 'T', 'tT', 'Rs', 'Rts', 'RS',
                        'RtS', 'RT', 'RtT'
                ]:
                    job._state = 'SUSPENDED'
                elif state in ['Eqw', 'Ehqw', 'EhRqw']:
                    job._state = 'ERROR'
                elif state in [
                        'dr', 'dt', 'dRr', 'dRt', 'ds', 'dS', 'dT', 'dRs',
                        'dRS', 'dRT'
                ]:
                    job._state = 'DELETING'
                elif state == 'z':
                    job._state = 'COMPLETED'

        # Mark any "unknown" job as completed
        unknown_jobs = set(jobs) - known_jobs
        for job in unknown_jobs:
            self.log(f'Job {job.jobid} not known to scheduler, '
                     f'assuming job completed')
            job._state = 'COMPLETED'
Example #20
0
 def _get_nodes_by_name(self, nodespec):
     completed = osext.run_command('scontrol -a show -o node %s' % nodespec)
     node_descriptions = completed.stdout.splitlines()
     return _create_nodes(node_descriptions)
Example #21
0
def test_command_success_cmd_seq():
    completed = osext.run_command(['echo', 'foobar'])
    assert completed.returncode == 0
    assert completed.stdout == 'foobar\n'
Example #22
0
    def poll(self, *jobs):
        def output_ready(job):
            # We report a job as finished only when its stdout/stderr are
            # written back to the working directory
            stdout = os.path.join(job.workdir, job.stdout)
            stderr = os.path.join(job.workdir, job.stderr)
            return os.path.exists(stdout) and os.path.exists(stderr)

        if jobs:
            # Filter out non-jobs
            jobs = [job for job in jobs if job is not None]

        if not jobs:
            return

        completed = osext.run_command(
            f'qstat -f {" ".join(job.jobid for job in jobs)}')

        # Depending on the configuration, completed jobs will remain on the job
        # list for a limited time, or be removed upon completion.
        # If qstat cannot find any of the job IDs, it will return 153.
        # Otherwise, it will return with return code 0 and print information
        # only for the jobs it could find.
        if completed.returncode in (153, 35):
            self.log(f'Return code is {completed.returncode}')
            for job in jobs:
                job._state = 'COMPLETED'
                if job.cancelled or output_ready(job):
                    self.log(f'Assuming job {job.jobid} completed')
                    job._completed = True

            return

        if completed.returncode != 0:
            raise JobSchedulerError(
                f'qstat failed with exit code {completed.returncode} '
                f'(standard error follows):\n{completed.stderr}')

        # Store information for each job separately
        jobinfo = {}
        for job_raw_info in completed.stdout.split('\n\n'):
            jobid_match = re.search(r'^Job Id:\s*(?P<jobid>\S+)', job_raw_info,
                                    re.MULTILINE)
            if jobid_match:
                jobid = jobid_match.group('jobid')
                jobinfo[jobid] = job_raw_info

        for job in jobs:
            if job.jobid not in jobinfo:
                self.log(f'Job {job.jobid} not known to scheduler')
                job._state = 'COMPLETED'
                if job.cancelled or output_ready(job):
                    self.log(f'Assuming job {job.jobid} completed')
                    job._completed = True

                continue

            info = jobinfo[job.jobid]
            state_match = re.search(r'^\s*job_state = (?P<state>[A-Z])', info,
                                    re.MULTILINE)
            if not state_match:
                self.log(f'Job state not found (job info follows):\n{info}')
                continue

            state = state_match.group('state')
            job._state = JOB_STATES[state]
            nodelist_match = re.search(r'exec_host = (?P<nodespec>[\S\t\n]+)',
                                       info, re.MULTILINE)
            if nodelist_match:
                nodespec = nodelist_match.group('nodespec')
                nodespec = re.sub(r'[\n\t]*', '', nodespec)
                self._update_nodelist(job, nodespec)

            if job.state == 'COMPLETED':
                exitcode_match = re.search(
                    r'^\s*exit_status = (?P<code>\d+)',
                    info,
                    re.MULTILINE,
                )
                if exitcode_match:
                    job._exitcode = int(exitcode_match.group('code'))

                # We report a job as finished only when its stdout/stderr are
                # written back to the working directory
                done = job.cancelled or output_ready(job)
                if done:
                    job._completed = True
            elif (job.state in ['QUEUED', 'HELD', 'WAITING']
                  and job.max_pending_time):
                if (time.time() - job.submit_time >= job.max_pending_time):
                    self.cancel(job)
                    job._exception = JobError('maximum pending time exceeded',
                                              job.jobid)
Example #23
0
def test_command_error_cmd_seq():
    with pytest.raises(SpawnedProcessError,
                       match=r"command 'false' failed with exit code 1"):
        osext.run_command(['false'], check=True)