Esempio n. 1
0
    def get_usage():
        data = {}

        cores_total, mem_total = 0, 0
        cores_alloc, mem_alloc = 0, 0
        cores_user, mem_user = 0, 0
        nodes = 0

        try:
            out = execute_command(CMD_INFO)
        except QMapError as e:
            raise ExecutorError(e)
        else:
            lines = out.splitlines()
            for line in lines:
                values = line.strip().split()
                node_id = values[0]
                all_cores = values[1].split('/')
                cores_total += int(all_cores[3])
                cores_alloc += int(all_cores[0])
                mem_total += int(values[2]) // 1024
                mem_alloc += int(values[3]) // 1024
                node_state = values[4]
                if node_state not in ['mix', 'idle',
                                      'alloc']:  # exclude nodes not working
                    continue
                nodes += 1

        data['nodes'] = nodes
        data['usage'] = get_usage_percentage(cores_alloc, mem_alloc,
                                             cores_total, mem_total)

        try:
            out = execute_command(CMD_SQUEUE)
        except QMapError as e:
            raise ExecutorError(e)
        else:
            lines = out.splitlines()
            for line in lines:
                values = line.strip().split()
                cores_user += int(values[0])
                mem = values[1]
                mem_units = mem[-1]
                mem_value = int(float(mem[:-1]))
                mem_user += memory_convert(mem_value, mem_units, 'G')

        data['user'] = get_usage_percentage(cores_user, mem_user, cores_total,
                                            mem_total)

        return data
Esempio n. 2
0
File: lsf.py Progetto: bbglab/qmap
 def generate_job_status_running(job_ids, retries=3):
     done = set()
     cmd = 'bjobs -noheader -o "jobid stat start_time"'
     try:
         out = execute_command(cmd)
     except QMapError as e:
         if retries > 0:
             time.sleep(0.5)
             yield from Executor.generate_job_status_running(
                 job_ids=job_ids, retries=retries - 1)
         else:
             raise ExecutorError(e) from None
     else:
         info = {'usage': {'cluster': 'LSF'}}
         lines = out.splitlines()
         for i, line in enumerate(lines):
             l = line.strip().split(maxsplit=2)
             id_ = l[0]
             if id_ in job_ids:
                 status = LSF_STATUS.get(l[1], Status.OTHER)
                 error = ExecutorErrorCodes.UNKNOWN if status == Status.FAILED else ExecutorErrorCodes.NOERROR
                 done.add(id_)
                 if l[2].strip():
                     info = info.copy()
                     info['start_time'] = l[2].strip()
                 yield id_, (status, error, info)
     for id_ in done:
         job_ids.remove(id_)
Esempio n. 3
0
File: sge.py Progetto: bbglab/qmap
 def generate_job_status_running(job_ids, retries=3):
     done = set()
     cmd = "qstat"
     try:
         out = execute_command(cmd)
     except QMapError as e:
         if retries > 0:
             time.sleep(0.5)
             yield from Executor.generate_job_status_running(
                 job_ids=job_ids, retries=retries - 1)
         else:
             raise ExecutorError(e) from None
     else:
         info = {'usage': {'cluster': 'SGE'}}
         lines = out.splitlines()
         for i, line in enumerate(lines):
             if i < 2:
                 continue
             else:
                 l = line.strip().split()
                 id_ = l[0]
                 if id_ in job_ids:
                     status = QSTAT_STATUS.get(l[4], Status.OTHER)
                     error = ExecutorErrorCodes.UNKNOWN if status == Status.FAILED else ExecutorErrorCodes.NOERROR
                     done.add(id_)
                     yield id_, (status, error, info)
     for id_ in done:
         job_ids.remove(id_)
Esempio n. 4
0
File: lsf.py Progetto: bbglab/qmap
 def terminate_jobs(job_ids):
     cmd = "bkill {}".format(" ".join(job_ids))
     if len(job_ids) == 0:
         return '', cmd
     try:
         out = execute_command(cmd)
     except QMapError as e:
         raise ExecutorError(e)
     else:
         return out.strip(), cmd
Esempio n. 5
0
File: sge.py Progetto: bbglab/qmap
 def run_job(f_script, parameters, out=None, err=None):
     options = parse_parameters(parameters)
     if out is not None:
         options.append(
             '-o {}'.format(out))  # File to which STDOUT will be written
     if err is not None:
         options.append(
             '-e {}'.format(err))  # File to which STDERR will be written
     cmd = "qsub -terse -r no {} {}.{}".format(' '.join(options), f_script,
                                               SCRIPT_FILE_EXTENSION)
     try:
         out = execute_command(cmd)
     except QMapError:
         raise ExecutorError(
             'Job cannot be submitted to SGE. Command: {}'.format(cmd))
     return out.strip(), cmd
Esempio n. 6
0
File: lsf.py Progetto: bbglab/qmap
 def run_job(f_script, parameters, out=None, err=None):
     options = parse_parameters(parameters)
     if out is not None:
         options.append(
             '-o {}'.format(out))  # File to which STDOUT will be written
     if err is not None:
         options.append(
             '-e {}'.format(err))  # File to which STDERR will be written
     cmd = "bsub {} {}.{}".format(' '.join(options), f_script,
                                  SCRIPT_FILE_EXTENSION)  # -rn
     try:
         out = execute_command(cmd)
     except QMapError:
         raise ExecutorError(
             'Job cannot be submitted to slurm. Command: {}'.format(cmd))
     job_id = out.strip().split()[1].replace('<', '').replace('>', '')
     return job_id, cmd
Esempio n. 7
0
File: sge.py Progetto: bbglab/qmap
    def generate_job_status_finished(job_ids, retries=3):
        cmd = "qacct -j {}".format(','.join(job_ids))
        try:
            out = execute_command(cmd)
        except QMapError as e:
            if retries > 0:
                time.sleep(0.5)
                yield from Executor.generate_job_status_finished(
                    job_ids=job_ids, retries=retries - 1)
            else:
                raise ExecutorError(e) from None
        else:
            lines = out.splitlines()
            id_ = None
            status = Status.OTHER
            error = None
            info = {}
            for line in lines:
                if not line:
                    continue
                elif line.startswith('==') and id_ is not None:
                    yield id_, (status, error, info)
                    info = {'usage': {'cluster': {'type': 'SGE'}}}
                    error = None
                    status = Status.OTHER
                else:
                    k, v = line.strip().split(maxsplit=1)
                    if k == 'jobnumber':
                        id_ = v
                    elif k == 'exit_status':
                        if v == '0':
                            error = ExecutorErrorCodes.NOERROR
                            status = Status.DONE
                        else:
                            error = ExecutorErrorCodes.UNKNOWN
                            status = Status.FAILED
                            info['exit_code'] = v
                    elif k == 'maxvmem':
                        info['usage']['memory'] = v
                    elif k == 'hostname':
                        info['usage']['cluster']['nodes'] = v

            else:
                if id_ is not None:
                    yield id_, (status, error, info)
Esempio n. 8
0
    def generate_jobs_status(job_ids, retries=3):
        """
        For each job ID,
        we assume we have a single step (.0 for run and .batch for batch submissions).
        """

        cmd = "sacct --parsable2 --format {} --jobs {}".format(
            STATUS_FORMAT, ",".join(job_ids))
        try:
            out = execute_command(cmd)
        except QMapError as e:
            if retries > 0:
                time.sleep(0.5)
                yield from Executor.generate_jobs_status(job_ids=job_ids,
                                                         retries=retries - 1)
            else:
                raise ExecutorError(e) from None
        else:
            lines = out.splitlines()
            prev_id = None
            info = []
            for line in csv.DictReader(lines, delimiter='|'):
                # We will get the information from the latest step of the job.
                id_ = line.pop('JobID')
                job_id = id_.split('.')[0]
                if prev_id is None:
                    prev_id = job_id
                if prev_id == job_id:
                    info.append(line)
                else:
                    yield prev_id, parse_status(
                        info[-1])  # get latest line of previous job
                    prev_id = job_id
                    info = [line]
            else:
                if prev_id is not None:
                    yield prev_id, parse_status(info[-1])
Esempio n. 9
0
File: lsf.py Progetto: bbglab/qmap
 def generate_job_status_finished(job_ids, retries=3):
     cmd = "bacct -l {}".format(' '.join(job_ids))
     try:
         out = execute_command(cmd)
     except QMapError as e:
         if retries > 0:
             time.sleep(0.5)
             yield from Executor.generate_job_status_finished(
                 job_ids=job_ids, retries=retries - 1)
         else:
             raise ExecutorError(e) from None
     else:
         lines = out.splitlines()
         id_ = None
         status = Status.OTHER
         error = None
         info = {}
         for line in lines:
             if not line:
                 continue
             elif line.startswith('----') and id_ is not None:
                 yield id_, (status, error, info)
                 info = {'usage': {'cluster': {'type': 'LSF'}}}
                 error = None
                 status = Status.OTHER
             elif line.startswith('Job <'):
                 for match in regex.finditer(line):
                     name, value = match.groups()
                     if name == 'Job':
                         id_ = value
                     elif name == 'Status':
                         status = LSF_STATUS[value]
                     # TODO more info??
         else:
             if id_ is not None:
                 yield id_, (status, error, info)