def parse_sacct(job_info, log=None): try: job_info2 = job_info.copy() if job_info2['State'] in FAILED_STATES + PENDING_STATES: job_info2['exit_status'] = None else: job_info2['exit_status'] = int(job_info2['ExitCode'].split(":")[0]) job_info2['cpu_time'] = int(job_info2['CPUTimeRAW']) job_info2['wall_time'] = parse_slurm_time(job_info2['Elapsed']) job_info2['percent_cpu'] = div(float(job_info2['cpu_time']), float(job_info2['wall_time'])) job_info2['avg_rss_mem'] = convert_size_to_kb( job_info2['AveRSS']) if job_info2['AveRSS'] != '' else None job_info2['max_rss_mem'] = convert_size_to_kb( job_info2['MaxRSS']) if job_info2['MaxRSS'] != '' else None job_info2['avg_vms_mem'] = convert_size_to_kb( job_info2['AveVMSize']) if job_info2['AveVMSize'] != '' else None job_info2['max_vms_mem'] = convert_size_to_kb( job_info2['MaxVMSize']) if job_info2['MaxVMSize'] != '' else None except Exception as e: if log: log.info('Error Parsing: %s' % pformat(job_info2)) raise e return job_info2
def parse_sacct(job_info, log=None): try: job_info2 = job_info.copy() if job_info2["State"] in FAILED_STATES + PENDING_STATES: job_info2["exit_status"] = None else: job_info2["exit_status"] = int(job_info2["ExitCode"].split(":")[0]) job_info2["cpu_time"] = int(job_info2["CPUTimeRAW"]) job_info2["wall_time"] = parse_slurm_time(job_info2["Elapsed"]) job_info2["percent_cpu"] = div(float(job_info2["cpu_time"]), float(job_info2["wall_time"])) job_info2["avg_rss_mem"] = (convert_size_to_kb(job_info2["AveRSS"]) if job_info2["AveRSS"] != "" else None) job_info2["max_rss_mem"] = (convert_size_to_kb(job_info2["MaxRSS"]) if job_info2["MaxRSS"] != "" else None) job_info2["avg_vms_mem"] = (convert_size_to_kb(job_info2["AveVMSize"]) if job_info2["AveVMSize"] != "" else None) job_info2["max_vms_mem"] = (convert_size_to_kb(job_info2["MaxVMSize"]) if job_info2["MaxVMSize"] != "" else None) except Exception as e: if log: log.info("Error Parsing: %s" % pformat(job_info2)) raise e return job_info2
def parse_drmaa_jobinfo(drmaa_jobinfo): d = drmaa_jobinfo['resourceUsage'] cosmos_jobinfo = dict( exit_status=int(drmaa_jobinfo.get('exitStatus', os.EX_UNAVAILABLE)), percent_cpu=div(float(d.get('cpu', 0)), float(d.get('ru_wallclock', 0))), wall_time=float(d.get('ru_wallclock', 0)), cpu_time=float(d.get('cpu', 0)), user_time=float(d.get('ru_utime', 0)), system_time=float(d.get('ru_stime', 0)), # TODO should we be calling convert_size_to_kb() for avg_rss_mem? avg_rss_mem=d.get('ru_ixrss', "0"), max_rss_mem_kb=convert_size_to_kb(d.get('ru_maxrss', "0")), avg_vms_mem_kb=None, max_vms_mem_kb=convert_size_to_kb(d.get('maxvmem', "0")), io_read_count=int(float(d.get('ru_inblock', 0))), io_write_count=int(float(d.get('ru_oublock', 0))), io_wait=float(d.get('iow', 0)), io_read_kb=float(d.get('io', 0)), io_write_kb=float(d.get('io', 0)), ctx_switch_voluntary=int(float(d.get('ru_nvcsw', 0))), ctx_switch_involuntary=int(float(d.get('ru_nivcsw', 0))), avg_num_threads=None, max_num_threads=None, avg_num_fds=None, max_num_fds=None, memory=float(d.get('mem', 0)), ) # # Wait, what? drmaa has two exit status fields? Of course, they don't always # agree when an error occurs. Worse, sometimes drmaa doesn't set exit_status # when a job is killed. We may not be able to get the exact exit code, but # at least we can guarantee it will be non-zero for any job that shows signs # of terminating in error. # if int(drmaa_jobinfo['exitStatus']) != 0 or \ drmaa_jobinfo['hasSignal'] or \ drmaa_jobinfo['wasAborted'] or \ not drmaa_jobinfo['hasExited']: if cosmos_jobinfo['exit_status'] == 0: try: cosmos_jobinfo['exit_status'] = int( float(drmaa_jobinfo['resourceUsage']['exit_status'])) except KeyError: cosmos_jobinfo['exit_status'] = os.EX_UNAVAILABLE if cosmos_jobinfo['exit_status'] == 0: cosmos_jobinfo['exit_status'] = os.EX_SOFTWARE cosmos_jobinfo['successful'] = False else: cosmos_jobinfo['successful'] = True return cosmos_jobinfo
def _get_task_return_data(self, task): """ Convert raw qacct job data into Cosmos's more portable format. Returns a 2-tuple comprising: [0] a dictionary of job metadata, [1] a boolean indicating whether the metadata in [0] are affected by an SGE bug that causes qacct to occasionally return corrupt results. """ d = self.task_qacct(task) job_failed = d['failed'][0] != '0' data_are_corrupt = is_corrupt(d) if job_failed or data_are_corrupt: task.workflow.log.warn('%s SGE (qacct -j %s) reports %s:\n%s' % (task, task.drm_jobID, 'corrupt data' if data_are_corrupt else 'job failure', json.dumps(d, indent=4, sort_keys=True))) processed_data = dict( exit_status=int(d['exit_status']) if not job_failed else int(re.search(r'^(\d+)', d['failed']).group(1)), percent_cpu=div(float(d['cpu']), float(d['ru_wallclock'])), wall_time=float(d['ru_wallclock']), cpu_time=float(d['cpu']), user_time=float(d['ru_utime']), system_time=float(d['ru_stime']), avg_rss_mem=d['ru_ixrss'], max_rss_mem_kb=convert_size_to_kb(d['maxrss']), avg_vms_mem_kb=None, max_vms_mem_kb=convert_size_to_kb(d['maxvmem']), io_read_count=int(d['ru_inblock']), io_write_count=int(d['ru_oublock']), io_wait=float(d['iow']), io_read_kb=convert_size_to_kb("%fG" % float(d['io'])), io_write_kb=convert_size_to_kb("%fG" % float(d['io'])), ctx_switch_voluntary=int(d['ru_nvcsw']), ctx_switch_involuntary=int(d['ru_nivcsw']), avg_num_threads=None, max_num_threads=None, avg_num_fds=None, max_num_fds=None, memory=float(d['mem']), ) return processed_data, data_are_corrupt
def parse_sacct(job_info, log=None): try: job_info2 = job_info.copy() if job_info2['State'] in FAILED_STATES + PENDING_STATES: job_info2['exit_status'] = None else: job_info2['exit_status'] = int(job_info2['ExitCode'].split(":")[0]) job_info2['cpu_time'] = int(job_info2['CPUTimeRAW']) job_info2['wall_time'] = parse_slurm_time(job_info2['Elapsed']) job_info2['percent_cpu'] = div(float(job_info2['cpu_time']), float(job_info2['wall_time'])) job_info2['avg_rss_mem'] = convert_size_to_kb(job_info2['AveRSS']) if job_info2['AveRSS'] != '' else None job_info2['max_rss_mem'] = convert_size_to_kb(job_info2['MaxRSS']) if job_info2['MaxRSS'] != '' else None job_info2['avg_vms_mem'] = convert_size_to_kb(job_info2['AveVMSize']) if job_info2['AveVMSize'] != '' else None job_info2['max_vms_mem'] = convert_size_to_kb(job_info2['MaxVMSize']) if job_info2['MaxVMSize'] != '' else None except Exception as e: if log: log.info('Error Parsing: %s' % pformat(job_info2)) raise e return job_info2