コード例 #1
0
 def get_output(cmd):
     proc = py3_compat.Popen(cmd,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
     logging.debug('Communicating with job process.')
     stdout, stderr = proc.communicate()
     return stdout
コード例 #2
0
    def num_cpus(self):
        """Returns the number of cpus that qsub should reserve. PBSPro requires
        the cpu reservation be given to both qsub, and aprun.

        If cnselect is not callable, raise RuntimeError.

        :rtype: int
        :returns: Number of cpus to reserve, or -1 if there was no cnselect output
        """
        try:
            n_cpus = os.environ.get('CHPL_LAUNCHCMD_NUM_CPUS')
            if n_cpus is not None:
                return n_cpus
            logging.debug('Checking for number of cpus to reserve.')
            cnselect_proc = py3_compat.Popen(['cnselect', '-Lnumcores'],
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.STDOUT)

            logging.debug('Communicating with cnselect process.')
            stdout, stderr = cnselect_proc.communicate()
        except OSError as ex:
            raise RuntimeError(ex)
        first_line = stdout.split('\n')[0]
        if first_line:
            return int(first_line)
        else:
            msg = 'cnselect -Lnumcores had no output.'
            logging.error(msg)
            raise ValueError(msg)
コード例 #3
0
ファイル: chpl_launchcmd.py プロジェクト: ravikr126/chapel
    def _launch_qsub(self, testing_dir, output_file, error_file):
        """Launch job using qsub and return job id. Raises RuntimeError if
        self.submit_bin is anything but qsub.

        :type testing_dir: str
        :arg testing_dir: working directory for running test

        :type output_file: str
        :arg output_file: stdout log filename

        :type error_file: str
        :arg error_file: stderr log filename

        :rtype: str
        :returns: job id
        """
        if self.submit_bin != 'qsub':
            raise RuntimeError('_launch_qsub called for non-pbs job type!')

        # Quiet information from LMOD about module changes that would show up
        # in our test output
        logging.info('Setting LMOD_QUIET=1')
        os.environ["LMOD_QUIET"] = "1"

        logging.info('Starting {0} job "{1}" on {2} nodes with walltime {3} '
                     'and output file: {4}'.format(self.submit_bin,
                                                   self.job_name,
                                                   self.num_locales,
                                                   self.walltime, output_file))

        logging.debug('Opening {0} subprocess.'.format(self.submit_bin))
        submit_proc = py3_compat.Popen(self._qsub_command(
            output_file, error_file),
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.STDOUT,
                                       cwd=testing_dir,
                                       env=os.environ.copy())

        test_command_str = ' '.join(
            self.full_test_command(output_file, error_file))
        logging.debug(
            'Communicating with {0} subprocess. Sending test command on stdin: {1}'
            .format(self.submit_bin, test_command_str))
        stdout, stderr = submit_proc.communicate(input=test_command_str)
        logging.debug(
            '{0} process returned with status {1}, stdout: {2} stderr: {3}'.
            format(self.submit_bin, submit_proc.returncode, stdout, stderr))

        if submit_proc.returncode != 0:
            msg = '{0} failed with exit code {1} and output: {2}'.format(
                self.submit_bin, submit_proc.returncode, stdout)
            logging.error(msg)
            raise ValueError(msg)

        job_id = stdout.strip()
        return job_id
コード例 #4
0
    def _qstat(cls, job_id, args=None):
        """Call qstat and return output from stdout.

        Raises ValueError if exit code is non-zero.

        :type job_id: str
        :arg job_id: pbs job id

        :type args: list
        :arg args: additional arguments to pass qstat

        :rtype: str
        :returns: qsub job status
        """
        if args is None:
            args = []

        qstat_command = ['qstat'] + args + [job_id]
        logging.debug('qstat command to run: {0}'.format(qstat_command))

        logging.debug('Opening qstat subprocess.')
        qstat_proc = py3_compat.Popen(qstat_command,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.STDOUT,
                                      env=os.environ.copy())

        logging.debug('Communicating with qstat subprocess.')
        stdout, stderr = qstat_proc.communicate()
        logging.debug(
            'qstat process returned with status {0}, stdout: {1}, and stderr: {2}'
            .format(qstat_proc.returncode, stdout, stderr))

        if qstat_proc.returncode != 0:
            raise ValueError('Non-zero exit code {0} from qstat: "{1}"'.format(
                qstat_proc.returncode, stdout))
        else:
            return stdout
コード例 #5
0
    def status(cls, job_id):
        """Query job status using squeue.

        :type job_id: str
        :arg job_id: squeue job id

        :rtype: str
        :returns: squeue job status
        """
        squeue_command = [
            'squeue',
            '--noheader',
            '--format',
            '%A %T',  # "<job_id> <status>"
            '--states',
            'all',
            '--job',
            job_id,
        ]
        logging.debug('squeue command to run: {0}'.format(squeue_command))

        logging.debug('Opening squeue subprocess.')
        squeue_proc = py3_compat.Popen(squeue_command,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.STDOUT,
                                       env=os.environ.copy())

        logging.debug('Communicating with squeue subprocess.')
        stdout, stderr = squeue_proc.communicate()
        logging.debug(
            'squeue process returned with status {0}, stdout: {1}, stderr: {2}'
            .format(squeue_proc.returncode, stdout, stderr))

        if squeue_proc.returncode != 0:
            raise ValueError(
                'Non-zero exit code {0} from squeue: "{1}"'.format(
                    squeue_proc.returncode, stdout))

        failure_statuses = [
            'CANCELLED', 'FAILED', 'TIMEOUT', 'BOOT_FAIL', 'NODE_FAIL',
            'PREEMPTED'
        ]

        queued_statuses = ['CONFIGURING', 'PENDING']

        status_parts = stdout.split(' ')
        if len(status_parts) == 2:
            status = status_parts[1].strip()
            logging.info('Status for job {0} is: {1}'.format(job_id, status))

            if status == 'COMPLETED':
                logging.info('Job finished with status: {0}'.format(status))
                return 'C'
            elif status in failure_statuses:
                logging.info('Job finished with status: {0}'.format(status))
                return 'C'
            elif status in queued_statuses:
                return 'Q'
            else:
                return 'R'  # running
        else:
            raise ValueError(
                'Could not parse output from squeue: {0}'.format(stdout))
コード例 #6
0
    def submit_job(self, testing_dir, output_file, error_file, input_file):
        """Launch job using executable. Set CHPL_LAUNCHER_USE_SBATCH=true in
        environment to avoid using expect script. The executable will create a
        sbatch script and submit it. Parse and return the job id after job is
        submitted.

        :type testing_dir: str
        :arg testing_dir: working directory for running test

        :type output_file: str
        :arg output_file: stdout log filename

        :type error_file: str
        :arg error_file: stderr log filename

        :rtype: str
        :returns: job id
        """
        env = os.environ.copy()
        env['CHPL_LAUNCHER_USE_SBATCH'] = 'true'
        env['CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME'] = output_file
        env['CHPL_LAUNCHER_SLURM_ERROR_FILENAME'] = error_file

        if select.select([
                sys.stdin,
        ], [], [], 0.0)[0]:
            with open(input_file, 'w') as fp:
                fp.write(sys.stdin.read())
            env['SLURM_STDINMODE'] = input_file

        # We could use stdout buffering for other configurations too, but I
        # don't think there's any need. Currently, single locale perf testing
        # is the only config that has any tests that produce a lot of output
        if os.getenv('CHPL_TEST_PERF') != None and self.num_locales <= 1:
            env['CHPL_LAUNCHER_SLURM_BUFFER_STDOUT'] = 'true'

        cmd = self.test_command[:]
        # Add --nodelist into the command line
        if self.hostlist is not None:
            cmd.append('--{0}={1}'.format(self.hostlist_resource,
                                          self.hostlist))

        # Add --walltime back into the command line.
        if self.walltime is not None:
            cmd.append('--walltime')
            cmd.append(self.walltime)

        logging.debug('Command to submit job: {0}'.format(cmd))

        logging.debug('Opening job subprocess')
        submit_proc = py3_compat.Popen(cmd,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.STDOUT,
                                       cwd=testing_dir,
                                       env=env)

        logging.debug('Communicating with job subprocess')
        stdout, stderr = submit_proc.communicate()
        logging.debug(
            'Job process returned with status {0}, stdout: {1}, stderr: {2}'.
            format(submit_proc.returncode, stdout, stderr))

        if submit_proc.returncode != 0:
            msg = 'Job submission ({0}) failed with exit code {1} and output: {2}'.format(
                cmd, submit_proc.returncode, stdout)
            logging.error(msg)
            raise ValueError(msg)

        # Output is: Submitted batch job 106001
        id_parts = stdout.split(' ')
        if len(id_parts) < 4:
            raise ValueError(
                'Could not parse output from sbatch submission: {0}'.format(
                    stdout))
        else:
            job_id = id_parts[3].strip()
            return job_id