Ejemplo n.º 1
0
 def available():
     '''
     Check if Slurm is available on the system.
     '''
     if commons.which('sbatch'):
         return True
     return False
Ejemplo n.º 2
0
    def submit(self, job, **kwargs):
        '''
        Submit a job with sbatch. Pass wrap=False to disable wrapping the 
        command.

        :param job: Job object
        :type job: :mod:`executors.models.Job`
        :param wrap: Disable wrapping
        :type wrap: bool
        '''
        if not commons.which('sbatch'):
            raise CommandNotFound('sbatch')
        cmd = [
            'sbatch',
            '--parsable',
            '--partition', self.partition
        ]
        cmd.extend(self.args)
        cmd.extend(self._arguments(job))
        wrap = kwargs.get('wrap', True)
        command = job.command
        if wrap:
            if isinstance(command, list):
                command = sp.list2cmdline(command)
            cmd.extend([
                '--wrap', command
            ])
        else:
            if isinstance(command, six.string_types):
                command = shlex.split(command)
            cmd.extend(command)
        logger.debug(cmd)
        pid =  sp.check_output(cmd).strip().decode()
        job.pid = pid
Ejemplo n.º 3
0
    def _sacct_async(self, job_id):
        '''
        Run sacct command on a job and serialize output. This method is rate 
        limited to 5 calls every 20 seconds.

        :param job_id: Slurm job ID
        :type job_id: str
        :returns: List of sacct rows
        :rtype: list
        '''
        # build the sacct command
        if not commons.which('sacct'):
            raise CommandNotFound('sacct') 
        cmd = [
            'sacct',
            '--parsable2',
            '--delimiter', ',',
            '--brief',
            '--jobs',
            job_id
        ]
        # execute the sacct command, serialize, and return the result
        logger.debug(cmd)
        output = sp.check_output(cmd, universal_newlines=True).strip()
        output = csv.DictReader(io.StringIO(six.u(output)))
        return [row for row in output]
Ejemplo n.º 4
0
 def _cancel_async(self, job_id):
     if not commons.which('scancel'):
         raise CommandNotFound('scancel')
     cmd = [
         'scancel',
         job_id
     ]
     logger.debug(cmd)
     sp.check_output(cmd)
Ejemplo n.º 5
0
 def _alter_logs(self, job):
     match = re.match('^(\d+)\.', job.pid)
     pid = match.group(1)        
     qalter_args = list()
     if job.output and '%j' in job.output:
         output = job.output.replace('%j', pid)
         qalter_args.extend(['-o', os.path.expanduser(output)])
     if job.error and '%j' in job.error:
         error = job.error.replace('%j', pid)
         qalter_args.extend(['-e', os.path.expanduser(error)])
     if qalter_args:
         if not which('qalter'):
             raise CommandNotFound('qalter')
         cmd = ['qalter'] + qalter_args + [pid]
         sp.check_output(cmd)
Ejemplo n.º 6
0
 def cancel(self, job, wait=False):
     if not which('bkill'):
         raise CommandNotFound('bkill')
     cmd = ['bkill', job.pid]
     try:
         logger.debug(cmd)
         sp.check_output(cmd, stderr=sp.PIPE)
     except sp.CalledProcessError as e:
         # qdel will return a 255 exit status if it tries to query the
         # state of a Job ID that is already in a 'C' state or if the
         # Job ID is unknown. We should pass on either of these states
         if e.returncode == 255:
             logger.debug(
                 'job %s is in a completed state or unknown and cannot be cancelled',
                 job.pid)
             pass
         raise e
Ejemplo n.º 7
0
 def submit(self, job):
     prefix = '{0}-%j'.format(job.name) if job.name else '%j'
     if not job.output:
         job.output = os.path.expanduser('~/{0}.out'.format(prefix))
     if not job.error:
         job.error = os.path.expanduser('~/{0}.err'.format(prefix))
     command = job.command
     if isinstance(command, list):
         command = sp.list2cmdline(command)
     if not which('bsub'):
         raise CommandNotFound('bsub')
     cmd = ['bsub', '-q', self.partition]
     cmd.extend(self._default_args)
     cmd.extend(self._arguments(job))
     cmd.extend([command])
     logger.debug(sp.list2cmdline(cmd))
     output = sp.check_output(cmd, stderr=sp.STDOUT).strip().decode()
     pid = re.search('^Job <(\d+)>', output).group(1)
     logger.debug('parsed job id %s', pid)
     job.pid = pid
Ejemplo n.º 8
0
 def submit(self, job):
     command = job.command
     if isinstance(command, list):
         command = sp.list2cmdline(command)
     if not which('pbsubmit'):
         raise CommandNotFound('pbsubmit')
     cmd = [
         'pbsubmit',
         '-q', self.partition
     ]
     cmd.extend(self._arguments(job))
     cmd.extend([
         '-c', command
     ])
     logger.debug(sp.list2cmdline(cmd))
     output = sp.check_output(cmd, stderr=sp.STDOUT).decode('utf-8')
     output = output.strip().split('\n')
     pid = output[-1]
     job.pid = pid
     self._alter_logs(job) # insert pid into stdout and stderr files
     pbsjob = re.match('^Opening pbsjob_(\d+)', output[0]).groups(0)[0]
     job.pbsjob = pbsjob
Ejemplo n.º 9
0
 def qstat(self, job):
     if not which('qstat'):
         raise CommandNotFound('qstat')
     cmd = [
         'qstat',
         '-x',
         '-f',
         job.pid
     ]
     logger.debug(cmd)
     try:
         output = sp.check_output(cmd)
     except sp.CalledProcessError as e:
         if e.returncode == 170:
             logger.debug('job %s already in completed state, falling back to jobinfo', job.pid)
             output = self.jobinfo(job)
         elif e.returncode == 153:
             logger.debug('job %s unknown to the scheduler, falling back to jobinfo', job.pid)
             output = self.jobinfo(job)
         else:
             raise e
     return et.fromstring(output.strip())
Ejemplo n.º 10
0
 def bjobs(self, job):
     if not which('bjobs'):
         raise CommandNotFound('bjobs')
     cmd = ['bjobs', '-l', job.pid]
     logger.debug(cmd)
     try:
         output = sp.check_output(cmd).strip().decode()
     except sp.CalledProcessError as e:
         raise e
     pid = re.match('Job <(\d+)>', output).group(1)
     job_state = re.search('Status <(\w+)>', output).group(1)
     exit_status = None
     if job_state in Executor.INACTIVE:
         exit_status = 0
         if job_state == 'EXIT':
             exit_status = re.search('Exited with exit code (\d+).',
                                     output).group(1)
     return {
         'pid': pid,
         'job_state': job_state,
         'exit_status': exit_status,
         'output_path': job.output,
         'error_path': job.error
     }
Ejemplo n.º 11
0
 def cancel(self, job, wait=False):
     if not which('qdel'):
         raise CommandNotFound('qdel')
     cmd = [
         'qdel',
         job.pid
     ]
     try:
         logger.debug(cmd)
         sp.check_output(cmd, stderr=sp.PIPE)
     except sp.CalledProcessError as e:
         # qdel will return a 153 exit status if it tries to query the 
         # state of a Job ID that is already in a 'C' state, or a 170 
         # exit status if the Job ID is unknown. We should pass on either
         # of these states. A Job ID can become unknown only minutes after 
         # a job has entered the 'C' state.
         if e.returncode == 153:
             logger.debug('job %s is in a completed state and cannot be cancelled', job.pid)
             pass
         elif e.returncode == 170:
             logger.debug('job %s is unknown and cannot be cancelled', job.pid)
             pass
         else:
             raise e
Ejemplo n.º 12
0
 def available():
     if which('bsub'):
         return True
     return False
Ejemplo n.º 13
0
 def available():
     if which('pbsubmit'):
         return True
     return False