def line_to_dict(self, line, cfg=None):
        """
        Parses a PBS log file line into a python dict

        raises ValueError when line not valid

        """
        if cfg is None:
            cfg = {}

        logger.debug('Parsing line:')
        logger.debug(line)
        # Split line into parts, only care about raw_data
        date, random, job_num, raw_data = line.split(';')

        raw_data = raw_data.split(str(' '))

        data = {}
        formatted_data = {}

        formatted_data['jobid'] = job_num

        # Make into a dict using values key=value
        for d in raw_data:
            try:
                key, value = d.split('=')
                data[key] = value
            except:
                continue

        # Check to see if line worth proccessing
        if 'resources_used.walltime' not in data:
            return None

        formatted_data['user'] = data['user']
        formatted_data['project'] = None
        if 'project' in data:
            formatted_data['project'] = data['project']
        elif 'account' in data:
            formatted_data['project'] = data['account']

        ignore_project = cfg.get('ignore_project', None)
        if ignore_project is not None \
                and formatted_data['project'] == ignore_project:
            formatted_data['project'] = None

        if formatted_data['project'] is None:
            formatted_data['project'] = cfg.get('default_project', None)

        formatted_data['jobname'] = data['jobname']
        formatted_data['group'] = data['group']

        formatted_data['act_wall_time'] = \
            get_in_seconds(data['resources_used.walltime'])

        if 'Resource_List.walltime' in data:
            formatted_data['est_wall_time'] = \
                get_in_seconds(data.get('Resource_List.walltime'))
        else:
            formatted_data['est_wall_time'] = None

        formatted_data['exec_hosts'] = \
            [x[:-2] for x in data['exec_host'].split('+')]

        if 'resources_used.ncpus' in data:
            cores = int(data['resources_used.ncpus'])
        else:
            cores = data['exec_host'].count('/')
        formatted_data['cores'] = cores
        formatted_data['cpu_usage'] = cores * formatted_data['act_wall_time']

        formatted_data['queue'] = data['queue']

        formatted_data['mem'] = \
            get_mem_in_kb(data.get('resources_used.mem', '0kb'))
        formatted_data['vmem'] = \
            get_mem_in_kb(data.get('resources_used.vmem', '0kb'))

        formatted_data['list_pmem'] = \
            get_mem_in_mb(data.get('Resource_List.pmem', '0kb'))
        formatted_data['list_mem'] = \
            get_mem_in_mb(data.get('Resource_List.mem', '0kb'))

        formatted_data['list_vmem'] = \
            get_mem_in_mb(data.get('Resource_List.vmem', '0kb'))
        formatted_data['list_pvmem'] = \
            get_mem_in_mb(data.get('Resource_List.pvmem', '0kb'))

        formatted_data['exit_status'] = data['Exit_status']

        fromtimestamp = datetime.datetime.fromtimestamp
        formatted_data['ctime'] = \
            fromtimestamp(int(data['ctime'])).isoformat(str(' '))
        formatted_data['qtime'] = \
            fromtimestamp(int(data['qtime'])).isoformat(str(' '))
        formatted_data['etime'] = \
            fromtimestamp(int(data['etime'])).isoformat(str(' '))
        formatted_data['start'] = \
            fromtimestamp(int(data['start'])).isoformat(str(' '))

        logger.debug("Parsed following data")
        for k, v in formatted_data.items():
            logger.debug("%s = %s" % (k, v))

        return formatted_data
Exemple #2
0
def pbs_to_dict(line):
    """
    Parses a PBS log file line into a python dict
    
    raises KeyError when line not valid
    raises ValueError when time over 3 years

    """
    logging.debug('Parsing line:')
    logging.debug(line)
    # Split line into parts, only care about raw_data
    date, random, job_num, raw_data = line.split(';')
    
    raw_data = raw_data.split(' ')
    
    data = {}
    formatted_data = {}
    
    formatted_data['jobid'] = job_num

    # Make into a dict using values key=value
    for d in raw_data:
        try:
            key, value = d.split('=')
            data[key] = value
        except:
            continue

    # Check to see if line worth proccessing
    if not 'resources_used.walltime' in data:
        raise KeyError


    formatted_data['user'] = data['user']
    if 'account' in data:
        formatted_data['project'] = data['account']

    formatted_data['jobname'] = data['jobname']
    formatted_data['group'] = data['group']
    try:
        formatted_data['act_wall_time'] = get_in_seconds(data['resources_used.walltime'])
    except:
        logging.error('Failed to parse act_wall_time value: %s' % data['resources_used.walltime'])
        raise ValueError
        
    try:
        formatted_data['est_wall_time'] = get_in_seconds(data['Resource_List.walltime'])
    except:
        logging.error('Failed to parse est_wall_time value: %s' % data['Resource_List.walltime'])
        raise ValueError

    formatted_data['exec_hosts'] = [x[:-2] for x in data['exec_host'].split('+')]
    cores = data['exec_host'].count('/')
    formatted_data['cores'] = cores
    formatted_data['cpu_usage'] = cores * formatted_data['act_wall_time']
    
    formatted_data['queue'] = data['queue']

    formatted_data['mem'] = get_mem_in_kb(data.get('resources_used.mem', '0kb'))
    formatted_data['vmem'] = get_mem_in_kb(data.get('resources_used.vmem', '0kb'))

    formatted_data['list_pmem'] = get_mem_in_mb(data.get('Resource_List.pmem', '0kb'))
    formatted_data['list_mem'] = get_mem_in_mb(data.get('Resource_List.mem', '0kb'))

    formatted_data['list_vmem'] = get_mem_in_mb(data.get('Resource_List.vmem', '0kb'))
    formatted_data['list_pvmem'] = get_mem_in_mb(data.get('Resource_List.pvmem', '0kb'))
    
    formatted_data['exit_status'] = data['Exit_status']

    formatted_data['ctime'] = datetime.datetime.fromtimestamp(int(data['ctime'])).isoformat(' ')
    formatted_data['qtime'] = datetime.datetime.fromtimestamp(int(data['qtime'])).isoformat(' ')
    formatted_data['etime'] = datetime.datetime.fromtimestamp(int(data['etime'])).isoformat(' ')
    formatted_data['start'] = datetime.datetime.fromtimestamp(int(data['start'])).isoformat(' ')

    logging.debug("Parsed following data")
    for k,v in formatted_data.items():
        logging.debug("%s = %s" % (k, v))

    return formatted_data
Exemple #3
0
    def line_to_dict(self, line, cfg=None):
        """
        Parses a PBS log file line into a python dict

        raises ValueError when line not valid

        """
        if cfg is None:
            cfg = {}

        logger.debug('Parsing line:')
        logger.debug(line)
        # Split line into parts, only care about raw_data
        date, random, job_num, raw_data = line.split(';')

        raw_data = raw_data.split(str(' '))

        data = {}
        formatted_data = {}

        formatted_data['jobid'] = job_num

        # Make into a dict using values key=value
        for d in raw_data:
            try:
                key, value = d.split('=')
                data[key] = value
            except:
                continue

        # Check to see if line worth proccessing
        if 'resources_used.walltime' not in data:
            return None

        formatted_data['user'] = data['user']
        formatted_data['project'] = None
        if 'project' in data:
            formatted_data['project'] = data['project']
        elif 'account' in data:
            formatted_data['project'] = data['account']

        ignore_project = cfg.get('ignore_project', None)
        if ignore_project is not None \
                and formatted_data['project'] == ignore_project:
            formatted_data['project'] = None

        if formatted_data['project'] is None:
            formatted_data['project'] = cfg.get('default_project', None)

        formatted_data['jobname'] = data['jobname']
        formatted_data['group'] = data['group']

        formatted_data['act_wall_time'] = \
            get_in_seconds(data['resources_used.walltime'])

        if 'Resource_List.walltime' in data:
            formatted_data['est_wall_time'] = \
                get_in_seconds(data.get('Resource_List.walltime'))
        else:
            formatted_data['est_wall_time'] = None

        formatted_data['exec_hosts'] = \
            [x[:-2] for x in data['exec_host'].split('+')]

        if 'resources_used.ncpus' in data:
            cores = int(data['resources_used.ncpus'])
        else:
            cores = data['exec_host'].count('/')
        formatted_data['cores'] = cores
        formatted_data['cpu_usage'] = cores * formatted_data['act_wall_time']

        formatted_data['queue'] = data['queue']

        formatted_data['mem'] = \
            get_mem_in_kb(data.get('resources_used.mem', '0kb'))
        formatted_data['vmem'] = \
            get_mem_in_kb(data.get('resources_used.vmem', '0kb'))

        formatted_data['list_pmem'] = \
            get_mem_in_mb(data.get('Resource_List.pmem', '0kb'))
        formatted_data['list_mem'] = \
            get_mem_in_mb(data.get('Resource_List.mem', '0kb'))

        formatted_data['list_vmem'] = \
            get_mem_in_mb(data.get('Resource_List.vmem', '0kb'))
        formatted_data['list_pvmem'] = \
            get_mem_in_mb(data.get('Resource_List.pvmem', '0kb'))

        formatted_data['exit_status'] = data['Exit_status']

        fromtimestamp = datetime.datetime.fromtimestamp
        formatted_data['ctime'] = \
            fromtimestamp(int(data['ctime'])).isoformat(str(' '))
        formatted_data['qtime'] = \
            fromtimestamp(int(data['qtime'])).isoformat(str(' '))
        formatted_data['etime'] = \
            fromtimestamp(int(data['etime'])).isoformat(str(' '))
        formatted_data['start'] = \
            fromtimestamp(int(data['start'])).isoformat(str(' '))

        logger.debug("Parsed following data")
        for k, v in formatted_data.items():
            logger.debug("%s = %s" % (k, v))

        return formatted_data