Beispiel #1
0
def fixExitStatusLogLine(line,lineno):
    """
    One time helper function. It adds exit_status property to the jobs. 
    """
    try:
        date,event,fulljobid,attrs = line.split(';')
    except ValueError:
        log(LOG_WARNING, "skipping invalid line %d: '%s'" % (lineno,line))
        return
        
    log(LOG_DEBUG, "processing accounting line: %s:%s:%s ..." %(date, event, fulljobid))
    attrdir = {}
    try:
        for key,val in map(lambda x: x.split('=',1), attrs.split()): 
            attrdir[key] = val
    except ValueError:
        log(LOG_WARNING, "skipping attributes parsing (line no %d has invalid attributes): '%s'" % (lineno,attrs))

    jobid_name, server_name = JOBID_REGEX.search(fulljobid).groups()
    server,created = getBatchServer(server_name)
    job,created = Job.objects.get_or_create(jobid=jobid_name, server=server)
    if attrdir.has_key('Exit_status'):
        job.exit_status = int(attrdir['Exit_status'])
        job.save()
Beispiel #2
0
def parseOneLogLine(line,lineno):
    """
    Parse one line from accounting log and insert the data into DB.
    """
    cursor = connection.cursor()
    try:
        date,event,fulljobid,attrs = line.split(';')
    except ValueError:
        log(LOG_WARNING, "skipping invalid line %d: '%s'" % (lineno,line))
        return
        
    log(LOG_DEBUG, "processing accounting line: %s:%s:%s ..." %(date, event, fulljobid))
    # We ignore PBSPro Licensing lines (it is not job related)
    if event=='L':
        log(LOG_DEBUG, "ignored licensing line")
        return

    attrdir = {}
    try:
        for key,val in map(lambda x: x.split('=',1), attrs.split()): 
            attrdir[key] = val
    except ValueError:
        log(LOG_WARNING, "skipping line with invalid attribues %d: '%s'" % (lineno,attrs))

    jobid_name, server_name = JOBID_REGEX.search(fulljobid).groups()
    server,created = getBatchServer(server_name)
    if created:
        log(LOG_INFO, "new server will be created: %s" % server_name)

    #job,created = Job.objects.get_or_create(jobid=jobid_name, server=server)
    job = SQLJob()
    job.jobid = jobid_name
    job.server_id = server.id

    job.refresh_id_jobstate_id()


    if attrdir.has_key('owner'):
        shname = attrdir['owner'].split('@')[1]
        submithost,created = getSubmitHost(shname)
        if created:
            log(LOG_INFO, "new submit host will be created: %s" % shname)
        job.submithost_id = submithost.pk

    if attrdir.has_key('requestor'):
        shname = attrdir['requestor'].split('@')[1]
        submithost,created = getSubmitHost(shname)
        if created:
            log(LOG_INFO, "new submit host will be created: %s" % shname)
        job.submithost_id = submithost.id

    if attrdir.has_key('group'):
        group,created = getGroup(attrdir['group'], server)
        if created:
            log(LOG_INFO, "new group will be created: %s" % attrdir['group'])

    if attrdir.has_key('user'):
        user,created = getUser(attrdir['user'], server, group)
        if created:
            log(LOG_INFO, "new user will be created: %s" % attrdir['user'])
        job.job_owner_id = user.id
        # TODO: convert this to SQL as well
        user.group = group

    if attrdir.has_key('resources_used.cput'):
        h,m,s = attrdir['resources_used.cput'].split(":")
        job.cput = (int(h)*60+int(m))*60+int(s)
    if attrdir.has_key('resources_used.walltime'):
        h,m,s = attrdir['resources_used.walltime'].split(":")
        job.walltime = (int(h)*60+int(m))*60+int(s)
    if attrdir.has_key('resources_used.cput') and attrdir.has_key('resources_used.walltime'):
        if job.walltime!=0:
            job.efficiency = 100*job.cput/job.walltime
        else:
            job.efficiency = 0

    if attrdir.has_key('Exit_status'):
        job.exit_status = int(attrdir['Exit_status'])

    if event=='Q':
        new_state = getJobState('Q')
    elif event=='S' or event=='R' or event=='C' or event=='T':
        new_state = getJobState('R')
    elif event=='E':
        new_state = getJobState('C')
    elif event=='D':
        new_state = getJobState('D')
    elif event=='A':
        new_state = getJobState('A')
    elif event=='G':
        new_state = getJobState('D')
    else:
        log(LOG_ERROR, "Unknown event type in accounting log file: %s" % line)
        return
    if job.job_state_id != getJobState('C').id:
#        if new_state == getJobState('R') and job.job_state != getJobState('R'):
#            RunningJob.objects.get_or_create(mainjob=job)
#        elif new_state != getJobState('R') and job.job_state == getJobState('R'):
#            try:
#                rj = RunningJob.objects.get(mainjob=job)
#                rj.delete()
#            except RunningJob.DoesNotExist:
#                pass

        job.job_state_id = new_state.id
    else:
        log(LOG_INFO, "Job %s.%s is already finished, not changing the state." % (job.jobid,server.name))
    # running job cache update
        

    if attrdir.has_key('queue'):
        queue,created = getQueue(attrdir['queue'], server)
        if created:
            log(LOG_INFO, "new queue will be created: %s" % attrdir['queue'])
        job.queue_id = queue.id
    if attrdir.has_key('ctime'):
        job.ctime = datetime.datetime.fromtimestamp(int(attrdir['ctime']))
    if attrdir.has_key('mtime'):
        job.mtime = datetime.datetime.fromtimestamp(int(attrdir['mtime']))
    if attrdir.has_key('qtime'):
        job.qtime = datetime.datetime.fromtimestamp(int(attrdir['qtime']))
    if attrdir.has_key('etime'):
        job.etime = datetime.datetime.fromtimestamp(int(attrdir['etime']))
    if attrdir.has_key('start'):
        job.start_time = datetime.datetime.fromtimestamp(int(attrdir['start']))
    if attrdir.has_key('end'):
        job.comp_time = datetime.datetime.fromtimestamp(int(attrdir['end']))
    if attrdir.has_key('exec_host'):
        exec_host_names_slots = attrdir['exec_host'].split('+')
        job.jobslots = []

        # convert PBSPro records like 'node1/0*2' to more generic 'node1/0+node1/1'
        exec_host_names_slots_new = []
        for exec_host_name_slot in exec_host_names_slots:
            if exec_host_name_slot.find('*')>=0:
                exec_host_slot0, numslots = exec_host_name_slot.split('*')
                exec_host_name = exec_host_slot0.split('/')[0]
                exec_host_name_slot_new=[ "%s/%d" % (exec_host_name, i) for i in range(0,int(numslots)) ]
                log(LOG_DEBUG, "Exec_host %s converted to %s" % (exec_host_name_slot,exec_host_name_slot_new))
                exec_host_names_slots_new.extend(exec_host_name_slot_new)
            else:
                exec_host_names_slots_new.append(exec_host_name_slot)
        exec_host_names_slots = exec_host_names_slots_new

        for exec_host_name_slot in exec_host_names_slots:
                
            name,slotstr = exec_host_name_slot.split('/')
            slot = int(slotstr)
            node,created = getNode(name, server)
            if created:
                log(LOG_INFO, "new node will be created: node name: %s" % (name))
                node.save()
            js,created = getJobSlot(slot=slot,node=node)
            if created:
                log(LOG_INFO, "new jobslot will be created: slot: %d, node name: %s" % (slot,name))
                js.save()
            job.jobslots.append(js.id)
    job.save()


    if job.id == -1:
        job.refresh_id_jobstate_id()
    d,t = date.split(' ')
    m,d,y = d.split('/')
#    ae,created = AccountingEvent.objects.get_or_create(timestamp='%s-%s-%s %s' % (y,m,d,t), type=event, job=job)
    timestamp='%s-%s-%s %s' % (y,m,d,t)
    cursor.execute("INSERT IGNORE INTO trqacc_accountingevent (timestamp, type, job_id) VALUES (%s,%s,%s)", [timestamp, event, job.id])