def line_to_dict(self, line, cfg=None): """ Parses a PBS log file line into a python dict raises ValueError when line not valid """ if cfg is None: cfg = {} logger.debug('Parsing line:') logger.debug(line) # Split line into parts, only care about raw_data date, random, job_num, raw_data = line.split(';') raw_data = raw_data.split(str(' ')) data = {} formatted_data = {} formatted_data['jobid'] = job_num # Make into a dict using values key=value for d in raw_data: try: key, value = d.split('=') data[key] = value except: continue # Check to see if line worth proccessing if 'resources_used.walltime' not in data: return None formatted_data['user'] = data['user'] formatted_data['project'] = None if 'project' in data: formatted_data['project'] = data['project'] elif 'account' in data: formatted_data['project'] = data['account'] ignore_project = cfg.get('ignore_project', None) if ignore_project is not None \ and formatted_data['project'] == ignore_project: formatted_data['project'] = None if formatted_data['project'] is None: formatted_data['project'] = cfg.get('default_project', None) formatted_data['jobname'] = data['jobname'] formatted_data['group'] = data['group'] formatted_data['act_wall_time'] = \ get_in_seconds(data['resources_used.walltime']) if 'Resource_List.walltime' in data: formatted_data['est_wall_time'] = \ get_in_seconds(data.get('Resource_List.walltime')) else: formatted_data['est_wall_time'] = None formatted_data['exec_hosts'] = \ [x[:-2] for x in data['exec_host'].split('+')] if 'resources_used.ncpus' in data: cores = int(data['resources_used.ncpus']) else: cores = data['exec_host'].count('/') formatted_data['cores'] = cores formatted_data['cpu_usage'] = cores * formatted_data['act_wall_time'] formatted_data['queue'] = data['queue'] formatted_data['mem'] = \ get_mem_in_kb(data.get('resources_used.mem', '0kb')) formatted_data['vmem'] = \ get_mem_in_kb(data.get('resources_used.vmem', '0kb')) formatted_data['list_pmem'] = \ get_mem_in_mb(data.get('Resource_List.pmem', '0kb')) formatted_data['list_mem'] = \ get_mem_in_mb(data.get('Resource_List.mem', '0kb')) formatted_data['list_vmem'] = \ get_mem_in_mb(data.get('Resource_List.vmem', '0kb')) formatted_data['list_pvmem'] = \ get_mem_in_mb(data.get('Resource_List.pvmem', '0kb')) formatted_data['exit_status'] = data['Exit_status'] fromtimestamp = datetime.datetime.fromtimestamp formatted_data['ctime'] = \ fromtimestamp(int(data['ctime'])).isoformat(str(' ')) formatted_data['qtime'] = \ fromtimestamp(int(data['qtime'])).isoformat(str(' ')) formatted_data['etime'] = \ fromtimestamp(int(data['etime'])).isoformat(str(' ')) formatted_data['start'] = \ fromtimestamp(int(data['start'])).isoformat(str(' ')) logger.debug("Parsed following data") for k, v in formatted_data.items(): logger.debug("%s = %s" % (k, v)) return formatted_data
def pbs_to_dict(line): """ Parses a PBS log file line into a python dict raises KeyError when line not valid raises ValueError when time over 3 years """ logging.debug('Parsing line:') logging.debug(line) # Split line into parts, only care about raw_data date, random, job_num, raw_data = line.split(';') raw_data = raw_data.split(' ') data = {} formatted_data = {} formatted_data['jobid'] = job_num # Make into a dict using values key=value for d in raw_data: try: key, value = d.split('=') data[key] = value except: continue # Check to see if line worth proccessing if not 'resources_used.walltime' in data: raise KeyError formatted_data['user'] = data['user'] if 'account' in data: formatted_data['project'] = data['account'] formatted_data['jobname'] = data['jobname'] formatted_data['group'] = data['group'] try: formatted_data['act_wall_time'] = get_in_seconds(data['resources_used.walltime']) except: logging.error('Failed to parse act_wall_time value: %s' % data['resources_used.walltime']) raise ValueError try: formatted_data['est_wall_time'] = get_in_seconds(data['Resource_List.walltime']) except: logging.error('Failed to parse est_wall_time value: %s' % data['Resource_List.walltime']) raise ValueError formatted_data['exec_hosts'] = [x[:-2] for x in data['exec_host'].split('+')] cores = data['exec_host'].count('/') formatted_data['cores'] = cores formatted_data['cpu_usage'] = cores * formatted_data['act_wall_time'] formatted_data['queue'] = data['queue'] formatted_data['mem'] = get_mem_in_kb(data.get('resources_used.mem', '0kb')) formatted_data['vmem'] = get_mem_in_kb(data.get('resources_used.vmem', '0kb')) formatted_data['list_pmem'] = get_mem_in_mb(data.get('Resource_List.pmem', '0kb')) formatted_data['list_mem'] = get_mem_in_mb(data.get('Resource_List.mem', '0kb')) formatted_data['list_vmem'] = get_mem_in_mb(data.get('Resource_List.vmem', '0kb')) formatted_data['list_pvmem'] = get_mem_in_mb(data.get('Resource_List.pvmem', '0kb')) formatted_data['exit_status'] = data['Exit_status'] formatted_data['ctime'] = datetime.datetime.fromtimestamp(int(data['ctime'])).isoformat(' ') formatted_data['qtime'] = datetime.datetime.fromtimestamp(int(data['qtime'])).isoformat(' ') formatted_data['etime'] = datetime.datetime.fromtimestamp(int(data['etime'])).isoformat(' ') formatted_data['start'] = datetime.datetime.fromtimestamp(int(data['start'])).isoformat(' ') logging.debug("Parsed following data") for k,v in formatted_data.items(): logging.debug("%s = %s" % (k, v)) return formatted_data