def available(): ''' Check if Slurm is available on the system. ''' if commons.which('sbatch'): return True return False
def submit(self, job, **kwargs): ''' Submit a job with sbatch. Pass wrap=False to disable wrapping the command. :param job: Job object :type job: :mod:`executors.models.Job` :param wrap: Disable wrapping :type wrap: bool ''' if not commons.which('sbatch'): raise CommandNotFound('sbatch') cmd = [ 'sbatch', '--parsable', '--partition', self.partition ] cmd.extend(self.args) cmd.extend(self._arguments(job)) wrap = kwargs.get('wrap', True) command = job.command if wrap: if isinstance(command, list): command = sp.list2cmdline(command) cmd.extend([ '--wrap', command ]) else: if isinstance(command, six.string_types): command = shlex.split(command) cmd.extend(command) logger.debug(cmd) pid = sp.check_output(cmd).strip().decode() job.pid = pid
def _sacct_async(self, job_id): ''' Run sacct command on a job and serialize output. This method is rate limited to 5 calls every 20 seconds. :param job_id: Slurm job ID :type job_id: str :returns: List of sacct rows :rtype: list ''' # build the sacct command if not commons.which('sacct'): raise CommandNotFound('sacct') cmd = [ 'sacct', '--parsable2', '--delimiter', ',', '--brief', '--jobs', job_id ] # execute the sacct command, serialize, and return the result logger.debug(cmd) output = sp.check_output(cmd, universal_newlines=True).strip() output = csv.DictReader(io.StringIO(six.u(output))) return [row for row in output]
def _cancel_async(self, job_id): if not commons.which('scancel'): raise CommandNotFound('scancel') cmd = [ 'scancel', job_id ] logger.debug(cmd) sp.check_output(cmd)
def _alter_logs(self, job): match = re.match('^(\d+)\.', job.pid) pid = match.group(1) qalter_args = list() if job.output and '%j' in job.output: output = job.output.replace('%j', pid) qalter_args.extend(['-o', os.path.expanduser(output)]) if job.error and '%j' in job.error: error = job.error.replace('%j', pid) qalter_args.extend(['-e', os.path.expanduser(error)]) if qalter_args: if not which('qalter'): raise CommandNotFound('qalter') cmd = ['qalter'] + qalter_args + [pid] sp.check_output(cmd)
def cancel(self, job, wait=False): if not which('bkill'): raise CommandNotFound('bkill') cmd = ['bkill', job.pid] try: logger.debug(cmd) sp.check_output(cmd, stderr=sp.PIPE) except sp.CalledProcessError as e: # qdel will return a 255 exit status if it tries to query the # state of a Job ID that is already in a 'C' state or if the # Job ID is unknown. We should pass on either of these states if e.returncode == 255: logger.debug( 'job %s is in a completed state or unknown and cannot be cancelled', job.pid) pass raise e
def submit(self, job): prefix = '{0}-%j'.format(job.name) if job.name else '%j' if not job.output: job.output = os.path.expanduser('~/{0}.out'.format(prefix)) if not job.error: job.error = os.path.expanduser('~/{0}.err'.format(prefix)) command = job.command if isinstance(command, list): command = sp.list2cmdline(command) if not which('bsub'): raise CommandNotFound('bsub') cmd = ['bsub', '-q', self.partition] cmd.extend(self._default_args) cmd.extend(self._arguments(job)) cmd.extend([command]) logger.debug(sp.list2cmdline(cmd)) output = sp.check_output(cmd, stderr=sp.STDOUT).strip().decode() pid = re.search('^Job <(\d+)>', output).group(1) logger.debug('parsed job id %s', pid) job.pid = pid
def submit(self, job): command = job.command if isinstance(command, list): command = sp.list2cmdline(command) if not which('pbsubmit'): raise CommandNotFound('pbsubmit') cmd = [ 'pbsubmit', '-q', self.partition ] cmd.extend(self._arguments(job)) cmd.extend([ '-c', command ]) logger.debug(sp.list2cmdline(cmd)) output = sp.check_output(cmd, stderr=sp.STDOUT).decode('utf-8') output = output.strip().split('\n') pid = output[-1] job.pid = pid self._alter_logs(job) # insert pid into stdout and stderr files pbsjob = re.match('^Opening pbsjob_(\d+)', output[0]).groups(0)[0] job.pbsjob = pbsjob
def qstat(self, job): if not which('qstat'): raise CommandNotFound('qstat') cmd = [ 'qstat', '-x', '-f', job.pid ] logger.debug(cmd) try: output = sp.check_output(cmd) except sp.CalledProcessError as e: if e.returncode == 170: logger.debug('job %s already in completed state, falling back to jobinfo', job.pid) output = self.jobinfo(job) elif e.returncode == 153: logger.debug('job %s unknown to the scheduler, falling back to jobinfo', job.pid) output = self.jobinfo(job) else: raise e return et.fromstring(output.strip())
def bjobs(self, job): if not which('bjobs'): raise CommandNotFound('bjobs') cmd = ['bjobs', '-l', job.pid] logger.debug(cmd) try: output = sp.check_output(cmd).strip().decode() except sp.CalledProcessError as e: raise e pid = re.match('Job <(\d+)>', output).group(1) job_state = re.search('Status <(\w+)>', output).group(1) exit_status = None if job_state in Executor.INACTIVE: exit_status = 0 if job_state == 'EXIT': exit_status = re.search('Exited with exit code (\d+).', output).group(1) return { 'pid': pid, 'job_state': job_state, 'exit_status': exit_status, 'output_path': job.output, 'error_path': job.error }
def cancel(self, job, wait=False): if not which('qdel'): raise CommandNotFound('qdel') cmd = [ 'qdel', job.pid ] try: logger.debug(cmd) sp.check_output(cmd, stderr=sp.PIPE) except sp.CalledProcessError as e: # qdel will return a 153 exit status if it tries to query the # state of a Job ID that is already in a 'C' state, or a 170 # exit status if the Job ID is unknown. We should pass on either # of these states. A Job ID can become unknown only minutes after # a job has entered the 'C' state. if e.returncode == 153: logger.debug('job %s is in a completed state and cannot be cancelled', job.pid) pass elif e.returncode == 170: logger.debug('job %s is unknown and cannot be cancelled', job.pid) pass else: raise e
def available(): if which('bsub'): return True return False
def available(): if which('pbsubmit'): return True return False