Beispiel #1
0
def pandalog(message):
    """
    Function to send message to panda logger.
    https://github.com/PanDAWMS/panda-jedi/blob/master/pandajedi/jediorder/JobGenerator.py#L405
    """
    try:
        # get logger and lock it
        tmpPandaLogger = PandaLogger()
        tmpPandaLogger.lock()
        # set category (usually prod) and type
        tmpPandaLogger.setParams({'Type':'retryModule'})
        tmpLogger = tmpPandaLogger.getHttpLogger(panda_config.loggername)
        # send the message and release the logger
        tmpLogger.debug(message)
        tmpPandaLogger.release()
    except Exception as e:
        _logger.warning("Could not upload message (%s) to pandamon logger. (Error: %s)"%(message, e))
def updateJob(req,
              jobId,
              state,
              token=None,
              transExitCode=None,
              pilotErrorCode=None,
              pilotErrorDiag=None,
              timestamp=None,
              timeout=60,
              xml='',
              node=None,
              workdir=None,
              cpuConsumptionTime=None,
              cpuConsumptionUnit=None,
              remainingSpace=None,
              schedulerID=None,
              pilotID=None,
              siteName=None,
              messageLevel=None,
              pilotLog='',
              metaData='',
              cpuConversionFactor=None,
              exeErrorCode=None,
              exeErrorDiag=None,
              pilotTiming=None,
              computingElement=None,
              startTime=None,
              endTime=None,
              nEvents=None,
              nInputFiles=None,
              batchID=None,
              attemptNr=None,
              jobMetrics=None,
              stdout='',
              jobSubStatus=None,
              coreCount=None,
              maxRSS=None,
              maxVMEM=None,
              maxSWAP=None,
              maxPSS=None,
              avgRSS=None,
              avgVMEM=None,
              avgSWAP=None,
              avgPSS=None):
    tmpLog = LogWrapper(
        _logger, 'updateJob PandaID={0} PID={1}'.format(jobId, os.getpid()))
    tmpLog.debug('start')
    # get DN
    realDN = _getDN(req)
    # get FQANs
    fqans = _getFQAN(req)
    # check production role
    prodManager = _checkRole(fqans,
                             realDN,
                             jobDispatcher,
                             site=siteName,
                             hostname=req.get_remote_host())
    # check token
    validToken = _checkToken(token, jobDispatcher)
    # accept json
    acceptJson = req.acceptJson()
    _logger.debug(
        "updateJob(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,attemptNr:%s,jobSubStatus:%s,core:%s,DN:%s,role:%s,token:%s,val:%s,FQAN:%s,maxRSS=%s,maxVMEM=%s,maxSWAP=%s,maxPSS=%s,avgRSS=%s,avgVMEM=%s,avgSWAP=%s,avgPSS=%s\n==XML==\n%s\n==LOG==\n%s\n==Meta==\n%s\n==Metrics==\n%s\n==stdout==\n%s)"
        % (jobId, state, transExitCode, pilotErrorCode, pilotErrorDiag, node,
           workdir, cpuConsumptionTime, cpuConsumptionUnit, remainingSpace,
           schedulerID, pilotID, siteName, messageLevel, nEvents, nInputFiles,
           cpuConversionFactor, exeErrorCode, exeErrorDiag, pilotTiming,
           computingElement, startTime, endTime, batchID, attemptNr,
           jobSubStatus, coreCount, realDN, prodManager, token, validToken,
           str(fqans), maxRSS, maxVMEM, maxSWAP, maxPSS, avgRSS, avgVMEM,
           avgSWAP, avgPSS, xml, pilotLog, metaData, jobMetrics, stdout))
    _pilotReqLogger.info('method=updateJob,site=%s,node=%s,type=None' %
                         (siteName, node))
    # invalid role
    if not prodManager:
        _logger.warning("updateJob(%s) : invalid role" % jobId)
        return Protocol.Response(Protocol.SC_Role).encode(acceptJson)
    # invalid token
    if not validToken:
        _logger.warning("updateJob(%s) : invalid token" % jobId)
        return Protocol.Response(Protocol.SC_Invalid).encode(acceptJson)
    # aborting message
    if jobId == 'NULL':
        return Protocol.Response(Protocol.SC_Success).encode(acceptJson)
    # check status
    if not state in [
            'running', 'failed', 'finished', 'holding', 'starting',
            'transferring'
    ]:
        _logger.warning("invalid state=%s for updateJob" % state)
        return Protocol.Response(Protocol.SC_Success).encode(acceptJson)
    # pilot log
    tmpLog.debug('sending log')
    if pilotLog != '':
        try:
            # make message
            message = pilotLog
            # get logger
            _pandaLogger = PandaLogger()
            _pandaLogger.lock()
            _pandaLogger.setParams({'Type': 'pilotLog', 'PandaID': int(jobId)})
            logger = _pandaLogger.getHttpLogger(panda_config.loggername)
            # add message
            logger.info(message)
        except:
            tmpLog.debug('failed to send log')
        finally:
            tmpLog.debug('release lock')
            try:
                # release HTTP handler
                _pandaLogger.release()
            except:
                pass
    tmpLog.debug('done log')
    # create parameter map
    param = {}
    if cpuConsumptionTime != None:
        param['cpuConsumptionTime'] = cpuConsumptionTime
    if cpuConsumptionUnit != None:
        param['cpuConsumptionUnit'] = cpuConsumptionUnit
    if node != None:
        param['modificationHost'] = node[:128]
    if transExitCode != None:
        param['transExitCode'] = transExitCode
    if pilotErrorCode != None:
        param['pilotErrorCode'] = pilotErrorCode
    if pilotErrorDiag != None:
        param['pilotErrorDiag'] = pilotErrorDiag[:500]
    if jobMetrics != None:
        param['jobMetrics'] = jobMetrics[:500]
    if schedulerID != None:
        param['schedulerID'] = schedulerID
    if pilotID != None:
        param['pilotID'] = pilotID[:200]
    if batchID != None:
        param['batchID'] = batchID[:80]
    if exeErrorCode != None:
        param['exeErrorCode'] = exeErrorCode
    if exeErrorDiag != None:
        param['exeErrorDiag'] = exeErrorDiag[:500]
    if cpuConversionFactor != None:
        param['cpuConversion'] = cpuConversionFactor
    if pilotTiming != None:
        param['pilotTiming'] = pilotTiming
    if computingElement != None:
        param['computingElement'] = computingElement
    if nEvents != None:
        param['nEvents'] = nEvents
    if nInputFiles != None:
        param['nInputFiles'] = nInputFiles
    if not jobSubStatus in [None, '']:
        param['jobSubStatus'] = jobSubStatus
    if not coreCount in [None, '']:
        param['actualCoreCount'] = coreCount
    if maxRSS != None:
        param['maxRSS'] = maxRSS
    if maxVMEM != None:
        param['maxVMEM'] = maxVMEM
    if maxSWAP != None:
        param['maxSWAP'] = maxSWAP
    if maxPSS != None:
        param['maxPSS'] = maxPSS
    if avgRSS != None:
        param['avgRSS'] = avgRSS
    if avgVMEM != None:
        param['avgVMEM'] = avgVMEM
    if avgSWAP != None:
        param['avgSWAP'] = avgSWAP
    if avgPSS != None:
        param['avgPSS'] = avgPSS
    if startTime != None:
        try:
            param['startTime'] = datetime.datetime(
                *time.strptime(startTime, '%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if endTime != None:
        try:
            param['endTime'] = datetime.datetime(
                *time.strptime(endTime, '%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if attemptNr != None:
        try:
            attemptNr = int(attemptNr)
        except:
            attemptNr = None
    if stdout != '':
        stdout = stdout[:2048]
    # invoke JD
    tmpLog.debug('executing')
    return jobDispatcher.updateJob(int(jobId), state, int(timeout), xml,
                                   siteName, param, metaData, attemptNr,
                                   stdout, acceptJson)
def updateJob(req,jobId,state,token=None,transExitCode=None,pilotErrorCode=None,pilotErrorDiag=None,timestamp=None,timeout=60,
              xml='',node=None,workdir=None,cpuConsumptionTime=None,cpuConsumptionUnit=None,remainingSpace=None,
              schedulerID=None,pilotID=None,siteName=None,messageLevel=None,pilotLog='',metaData='',
              cpuConversionFactor=None,exeErrorCode=None,exeErrorDiag=None,pilotTiming=None,computingElement=None,
              startTime=None,endTime=None,nEvents=None,nInputFiles=None,batchID=None,attemptNr=None,jobMetrics=None,
              stdout='',jobSubStatus=None,coreCount=None):
    _logger.debug("updateJob(%s)" % jobId)
    # get DN
    realDN = _getDN(req)
    # get FQANs
    fqans = _getFQAN(req)
    # check production role
    prodManager = _checkRole(fqans,realDN,jobDispatcher,site=siteName,hostname=req.get_remote_host())
    # check token
    validToken = _checkToken(token,jobDispatcher)
    _logger.debug("updateJob(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,attemptNr:%s,jobSubStatus:%s,core:%s,DN:%s,role:%s,token:%s,val:%s,FQAN:%s\n==XML==\n%s\n==LOG==\n%s\n==Meta==\n%s\n==Metrics==\n%s\n==stdout==\n%s)" %
                  (jobId,state,transExitCode,pilotErrorCode,pilotErrorDiag,node,workdir,cpuConsumptionTime,
                   cpuConsumptionUnit,remainingSpace,schedulerID,pilotID,siteName,messageLevel,nEvents,nInputFiles,
                   cpuConversionFactor,exeErrorCode,exeErrorDiag,pilotTiming,computingElement,startTime,endTime,
                   batchID,attemptNr,jobSubStatus,coreCount,realDN,prodManager,token,validToken,str(fqans),xml,pilotLog,metaData,jobMetrics,
                   stdout))
    _pilotReqLogger.info('method=updateJob,site=%s,node=%s,type=None' % (siteName,node))
    # invalid role
    if not prodManager:
        _logger.warning("updateJob(%s) : invalid role" % jobId)
        return Protocol.Response(Protocol.SC_Role).encode()        
    # invalid token
    if not validToken:
        _logger.warning("updateJob(%s) : invalid token" % jobId)
        return Protocol.Response(Protocol.SC_Invalid).encode()        
    # aborting message
    if jobId=='NULL':
        return Protocol.Response(Protocol.SC_Success).encode()
    # check status
    if not state in ['running','failed','finished','holding','starting','transferring']:
        _logger.warning("invalid state=%s for updateJob" % state)
        return Protocol.Response(Protocol.SC_Success).encode()        
    # pilot log
    if pilotLog != '':
        try:
            # make message
            message = pilotLog
            # get logger
            _pandaLogger = PandaLogger()
            _pandaLogger.lock()
            _pandaLogger.setParams({'Type':'pilotLog','PandaID':int(jobId)})
            logger = _pandaLogger.getHttpLogger(panda_config.loggername)
            # add message
            logger.info(message)                
            # release HTTP handler
            _pandaLogger.release()
        except:
            pass
    # create parameter map
    param = {}
    if cpuConsumptionTime != None:
        param['cpuConsumptionTime']=cpuConsumptionTime
    if cpuConsumptionUnit != None:
        param['cpuConsumptionUnit']=cpuConsumptionUnit
    if node != None:
        param['modificationHost']=node[:128]
    if transExitCode != None:
        param['transExitCode']=transExitCode
    if pilotErrorCode != None:
        param['pilotErrorCode']=pilotErrorCode
    if pilotErrorDiag != None:
        param['pilotErrorDiag']=pilotErrorDiag[:500]
    if jobMetrics != None:
        param['jobMetrics']=jobMetrics[:500]
    if schedulerID != None:
        param['schedulerID']=schedulerID
    if pilotID != None:
        param['pilotID']=pilotID[:200]
    if batchID != None:
        param['batchID']=batchID
    if exeErrorCode != None:
        param['exeErrorCode']=exeErrorCode
    if exeErrorDiag != None:
        param['exeErrorDiag']=exeErrorDiag[:500]
    if cpuConversionFactor != None:
        param['cpuConversion']=cpuConversionFactor
    if pilotTiming != None:
        param['pilotTiming']=pilotTiming
    if computingElement != None:
        param['computingElement']=computingElement
    if nEvents != None:
        param['nEvents']=nEvents
    if nInputFiles != None:
        param['nInputFiles']=nInputFiles
    if not jobSubStatus in [None,'']:
        param['jobSubStatus']=jobSubStatus
    if not coreCount in [None,'']:
        param['actualCoreCount']=coreCount
    if startTime != None:
        try:
            param['startTime']=datetime.datetime(*time.strptime(startTime,'%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if endTime != None:
        try:
            param['endTime']=datetime.datetime(*time.strptime(endTime,'%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if attemptNr != None:
        try:
            attemptNr = int(attemptNr)
        except:
            attemptNr = None
    if stdout != '':
        stdout = stdout[:2048]
    # invoke JD
    return jobDispatcher.updateJob(int(jobId),state,int(timeout),xml,siteName,
                                   param,metaData,attemptNr,stdout)
Beispiel #4
0
from pandalogger.PandaLogger import PandaLogger
tmpPandaLogger = PandaLogger()
tmpPandaLogger.lock()
tmpPandaLogger.setParams({'Type': 'retryModule'})
tmpLogger = tmpPandaLogger.getHttpLogger('dev')
tmpLogger.debug("This is only a test")
def _getPFNFromLFC(lfns,
                   dq2url,
                   guids,
                   storageName,
                   scopeList=[],
                   tmpLog=None):
    if tmpLog == None:
        tmpLog = LogWrapper(_log, logPrefix)
    tmpLog.debug('_getPFNFromLFC %s %s / %s LFNs:%s %s' %
                 (dq2url, str(storageName), len(lfns), str(
                     lfns[:3]), str(scopeList[:3])))
    outStr = ''
    # check paramter
    if guids == [] or storageName == [] or (len(lfns) != len(guids)):
        tmpLog.debug('_getPFNFromLFC done with empty list')
        return outStr
    # check scopeList
    if not scopeList in [None, []] and len(lfns) != len(scopeList):
        tmpLog.warning('_getPFNFromLFC wrong scopeList %s %s %s %s' %
                       (dq2url, str(storageName), str(lfns), str(scopeList)))
        tmpLog.error('_getPFNFromLFC failed')
        return outStr
    # loop over all LFNs
    iLFN = 0
    nLFN = 1000
    strFiles = ''
    outStr = ''
    for iLFN in range(len(lfns)):
        if scopeList != []:
            strFiles += '%s %s %s\n' % (lfns[iLFN], guids[iLFN],
                                        scopeList[iLFN])
        else:
            strFiles += '%s %s\n' % (lfns[iLFN], guids[iLFN])
        # bulk operation
        if (iLFN + 1) % nLFN == 0 or (iLFN + 1) >= len(lfns):
            # write to file
            inFileName = '%s/lfcin.%s' % (panda_config.logdir,
                                          commands.getoutput('uuidgen'))
            ifile = open(inFileName, 'w')
            ifile.write(strFiles)
            ifile.close()
            # construct commands
            strStorage = ''
            for storage in storageName:
                strStorage += '%s,' % storage
            strStorage = strStorage[:-1]
            com = 'cd %s > /dev/null 2>&1; export HOME=%s; ' % (
                panda_config.home_dir_cwd, panda_config.home_dir_cwd)
            com += 'unset LD_LIBRARY_PATH; unset PYTHONPATH; export PATH=/usr/local/bin:/bin:/usr/bin; '
            com+= 'source %s; %s/python -Wignore %s/LFCclient.py -f %s -l %s -s %s' % \
                  (panda_config.glite_source,panda_config.native_python32,panda_config.lfcClient_dir,
                   inFileName,dq2url,strStorage)
            tmpLog.debug(com)
            # exeute
            status, output = commands.getstatusoutput(com)
            tmpLog.debug(status)
            if status == 0:
                outStr += output
            else:
                tmpLog.error("_getPFNFromLFC : %s %s %s" %
                             (dq2url, status, output))
                # send message to logger
                try:
                    # make message
                    message = 'LFC access : %s %s %s' % (dq2url, status,
                                                         output)
                    # get logger
                    _pandaLogger = PandaLogger()
                    _pandaLogger.lock()
                    _pandaLogger.setParams({'Type': 'broker_util'})
                    logger = _pandaLogger.getHttpLogger(
                        panda_config.loggername)
                    # add message
                    logger.error(message)
                    # release HTTP handler
                    _pandaLogger.release()
                except:
                    pass
                tmpLog.error('_getPFNFromLFC failed')
                return status
            # reset
            strFiles = ''
    tmpLog.debug('_getPFNFromLFC done')
    # return
    return outStr
def _getPFNFromLFC(lfns,dq2url,guids,storageName,scopeList=[],tmpLog=None):
    if tmpLog == None:
        tmpLog = LogWrapper(_log,logPrefix)
    tmpLog.debug('_getPFNFromLFC %s %s / %s LFNs:%s %s' % (dq2url,str(storageName),
                                                         len(lfns),str(lfns[:3]),str(scopeList[:3])))
    outStr = ''
    # check paramter
    if guids == [] or storageName == [] or (len(lfns) != len(guids)):
        tmpLog.debug('_getPFNFromLFC done with empty list')
        return outStr
    # check scopeList
    if not scopeList in [None,[]] and len(lfns) != len(scopeList):
        tmpLog.warning('_getPFNFromLFC wrong scopeList %s %s %s %s' % (dq2url,str(storageName),
                                                                       str(lfns),str(scopeList)))
        tmpLog.error('_getPFNFromLFC failed')
        return outStr
    # loop over all LFNs
    iLFN = 0
    nLFN = 1000
    strFiles = ''    
    outStr = ''
    for iLFN in range(len(lfns)):
        if scopeList != []:
            strFiles  += '%s %s %s\n' % (lfns[iLFN],guids[iLFN],scopeList[iLFN]) 
        else:
            strFiles  += '%s %s\n' % (lfns[iLFN],guids[iLFN]) 
        # bulk operation
        if (iLFN+1) % nLFN == 0 or (iLFN+1) >= len(lfns):
            # write to file
            inFileName = '%s/lfcin.%s'  % (panda_config.logdir,commands.getoutput('uuidgen'))
            ifile = open(inFileName,'w')
            ifile.write(strFiles)
            ifile.close()
            # construct commands
            strStorage = ''
            for storage in storageName:
                strStorage += '%s,' % storage
            strStorage = strStorage[:-1]
            com = 'cd %s > /dev/null 2>&1; export HOME=%s; ' % (panda_config.home_dir_cwd,panda_config.home_dir_cwd)            
            com+= 'unset LD_LIBRARY_PATH; unset PYTHONPATH; export PATH=/usr/local/bin:/bin:/usr/bin; '
            com+= 'source %s; %s/python -Wignore %s/LFCclient.py -f %s -l %s -s %s' % \
                  (panda_config.glite_source,panda_config.native_python32,panda_config.lfcClient_dir,
                   inFileName,dq2url,strStorage)
            tmpLog.debug(com)
            # exeute
            status,output = commands.getstatusoutput(com)
            tmpLog.debug(status)
            if status == 0:
                outStr += output
            else:
                tmpLog.error("_getPFNFromLFC : %s %s %s" % (dq2url,status,output))
                # send message to logger
                try:
                    # make message
                    message = 'LFC access : %s %s %s' % (dq2url,status,output)
                    # get logger
                    _pandaLogger = PandaLogger()
                    _pandaLogger.lock()
                    _pandaLogger.setParams({'Type':'broker_util'})
                    logger = _pandaLogger.getHttpLogger(panda_config.loggername)
                    # add message
                    logger.error(message)
                    # release HTTP handler
                    _pandaLogger.release()
                except:
                    pass
                tmpLog.error('_getPFNFromLFC failed')
                return status
            # reset
            strFiles = ''
    tmpLog.debug('_getPFNFromLFC done')
    # return
    return outStr
Beispiel #7
0
from pandalogger.PandaLogger import PandaLogger
tmpPandaLogger = PandaLogger()
tmpPandaLogger.lock()
tmpPandaLogger.setParams({'Type':'retryModule'})
tmpLogger = tmpPandaLogger.getHttpLogger('dev')
tmpLogger.debug("This is only a test")
_loggerMap = {}
pandaLogger = PandaLogger()

while True:
    # read line
    line = raw_input()
    # extract host, request and response
    items = re.findall('(\S+) - - \[[^\]]+\] ("[^"]+") (\d+)', line)
    if len(items) == 1:
        # host
        host = items[0][0]
        # request
        request = items[0][1].split()[1].split('/')[-1]
        if request == 'isAlive':
            # somehow isAlive is not recorded
            request = 'IsAlive'
        # set logtype
        if request.startswith('datasetCompleted'):
            logtype = 'datasetCompleted'
        else:
            logtype = request
        # response
        response = items[0][2]
        # make message
        message = '%s - %s %s' % (host, request, response)
        # get logger
        pandaLogger.setParam('Type', logtype)
        logger = pandaLogger.getHttpLogger('prod')
        # add message
        logger.info(message)