Beispiel #1
0
 def getSiteMapper(self):
     timeNow = datetime.datetime.utcnow()
     if datetime.datetime.utcnow(
     ) - self.dateTimeForSM > datetime.timedelta(minutes=10):
         self.siteMapper = SiteMapper(self)
         self.dateTimeForSM = timeNow
     return self.siteMapper
Beispiel #2
0
 def __init__(self, conn):
     CommandReceiveInterface.__init__(self, conn)
     TaskBuffer.TaskBuffer.__init__(self)
     TaskBuffer.TaskBuffer.init(self,
                                jedi_config.db.dbhost,
                                jedi_config.db.dbpasswd,
                                nDBConnection=1)
     # site mapper
     self.siteMapper = SiteMapper(self)
     # update time for site mapper
     self.dateTimeForSM = datetime.datetime.utcnow()
     logger.debug('__init__')
Beispiel #3
0
        startTime = datetime.datetime(
            *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            tmpLog.debug("old process : %s %s" % (pid, startTime))
            tmpLog.debug(line)
            commands_get_status_output('kill -9 %s' % pid)
except Exception:
    type, value, traceBack = sys.exc_info()
    tmpLog.error("kill process : %s %s" % (type, value))

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

# instantiate sitemapper
aSiteMapper = SiteMapper(taskBuffer)

# delete
tmpLog.debug("Del session")
status, retSel = taskBuffer.querySQLS(
    "SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4", {})
if retSel is not None:
    try:
        maxID = retSel[0][0]
        tmpLog.debug("maxID : %s" % maxID)
        if maxID is not None:
            varMap = {}
            varMap[':maxID'] = maxID
            varMap[':jobStatus1'] = 'activated'
            varMap[':jobStatus2'] = 'waiting'
            varMap[':jobStatus3'] = 'failed'
def main(backGround=False):
    _logger.debug('starting ...')
    # register signal handler
    signal.signal(signal.SIGINT, catch_sig)
    signal.signal(signal.SIGHUP, catch_sig)
    signal.signal(signal.SIGTERM, catch_sig)
    signal.signal(signal.SIGALRM, catch_sig)
    signal.alarm(overallTimeout)
    # forking
    pid = os.fork()
    if pid != 0:
        # watch child process
        os.wait()
        time.sleep(1)
    else:
        # main loop
        from pandaserver.taskbuffer.TaskBuffer import taskBuffer
        # check certificate
        certName = '%s/pandasv1_usercert.pem' % panda_config.certdir
        keyName = '%s/pandasv1_userkey.pem' % panda_config.certdir

        _logger.debug('checking certificate {0}'.format(certName))
        certOK, certMsg = DataServiceUtils.checkCertificate(certName)
        if not certOK:
            _logger.error('bad certificate : {0}'.format(certMsg))
        # initialize cx_Oracle using dummy connection
        from pandaserver.taskbuffer.Initializer import initializer
        initializer.init()
        # instantiate TB
        taskBuffer.init(panda_config.dbhost,
                        panda_config.dbpasswd,
                        nDBConnection=1)
        # instantiate sitemapper
        siteMapper = SiteMapper(taskBuffer)
        # ActiveMQ params
        queue = '/queue/Consumer.PANDA.atlas.ddm.siteservices'
        ssl_opts = {
            'use_ssl': True,
            'ssl_version': ssl.PROTOCOL_TLSv1,
            'ssl_cert_file': certName,
            'ssl_key_file': keyName
        }
        # resolve multiple brokers
        brokerList = socket.gethostbyname_ex('atlas-mb.cern.ch')[-1]
        # set listener
        connList = []
        for tmpBroker in brokerList:
            try:
                clientid = 'PANDA-' + socket.getfqdn() + '-' + tmpBroker
                subscription_id = 'panda-server-consumer-' + socket.getfqdn()
                _logger.debug('setting listener %s' % clientid)
                conn = stomp.Connection(host_and_ports=[(tmpBroker, 61023)],
                                        **ssl_opts)
                connList.append(conn)
            except Exception:
                errtype, errvalue = sys.exc_info()[:2]
                _logger.error("failed to connect to %s : %s %s" %
                              (tmpBroker, errtype, errvalue))
                catch_sig(None, None)
        while True:
            for conn in connList:
                try:
                    if not conn.is_connected():
                        conn.set_listener(
                            'FileCallbackListener',
                            FileCallbackListener(conn, taskBuffer, siteMapper,
                                                 subscription_id))
                        conn.start()
                        conn.connect(headers={'client-id': clientid})
                        conn.subscribe(destination=queue,
                                       id=subscription_id,
                                       ack='client-individual')
                        _logger.debug('listener %s is up and running' %
                                      clientid)
                except Exception:
                    errtype, errvalue = sys.exc_info()[:2]
                    _logger.error("failed to set listener on %s : %s %s" %
                                  (tmpBroker, errtype, errvalue))
                    catch_sig(None, None)
            time.sleep(5)
Beispiel #5
0
# password
from pandaserver.config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('prioryMassage')
tmpLog = LogWrapper(_logger)

tmpLog.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

# get usage breakdown
usageBreakDownPerUser = {}
usageBreakDownPerSite = {}
workingGroupList = []
for table in ['ATLAS_PANDA.jobsActive4', 'ATLAS_PANDA.jobsArchived4']:
    varMap = {}
    varMap[':prodSourceLabel'] = 'user'
    varMap[':pmerge'] = 'pmerge'
    if table == 'ATLAS_PANDA.jobsActive4':
        sql = "SELECT COUNT(*),prodUserName,jobStatus,workingGroup,computingSite FROM %s WHERE prodSourceLabel=:prodSourceLabel AND processingType<>:pmerge GROUP BY prodUserName,jobStatus,workingGroup,computingSite" % table
    else:
        # with time range for archived table
        varMap[':modificationTime'] = datetime.datetime.utcnow(
        ) - datetime.timedelta(minutes=60)
Beispiel #6
0
def main(argv=tuple(), tbuf=None, **kwargs):

    try:
        long
    except NameError:
        long = int

    prelock_pid = GenericThread().get_pid()
    tmpLog = LogWrapper(_logger, "<pid={}>".format(prelock_pid))

    tmpLog.debug("===================== start =====================")

    # return value, true to run main again in next daemon loop
    ret_val = True

    # grace period
    try:
        gracePeriod = int(argv[1])
    except Exception:
        gracePeriod = 1

    # lock interval in minutes
    lock_interval = 10

    # retry interval in minutes
    retry_interval = 3

    # instantiate TB
    if tbuf is None:
        from pandaserver.taskbuffer.TaskBuffer import taskBuffer
        taskBuffer.init(panda_config.dbhost,
                        panda_config.dbpasswd,
                        nDBConnection=1)
    else:
        taskBuffer = tbuf

    # instantiate sitemapper
    aSiteMapper = SiteMapper(taskBuffer)

    # thread for adder
    class AdderThread(GenericThread):
        def __init__(self, taskBuffer, aSiteMapper, job_output_reports):
            GenericThread.__init__(self)
            self.taskBuffer = taskBuffer
            self.aSiteMapper = aSiteMapper
            self.job_output_reports = job_output_reports

        # main loop
        def run(self):
            # initialize
            taskBuffer = self.taskBuffer
            aSiteMapper = self.aSiteMapper
            # get file list
            timeNow = datetime.datetime.utcnow()
            timeInt = datetime.datetime.utcnow()
            # unique pid
            GenericThread.__init__(self)
            uniq_pid = self.get_pid()
            # log pid
            tmpLog.debug("pid={0} : run".format(uniq_pid))
            # stats
            n_processed = 0
            # loop
            while True:
                # get report
                one_jor = self.job_output_reports.pop()
                if not one_jor:
                    break
                # lock
                panda_id, job_status, attempt_nr, time_stamp = one_jor
                got_lock = taskBuffer.lockJobOutputReport(
                    panda_id=panda_id,
                    attempt_nr=attempt_nr,
                    pid=uniq_pid,
                    time_limit=lock_interval)
                if not got_lock:
                    continue
                # add
                try:
                    modTime = time_stamp
                    if (timeNow - modTime) > datetime.timedelta(hours=24):
                        # last add
                        tmpLog.debug(
                            "pid={0} : last add job={1}.{2} st={3}".format(
                                uniq_pid, panda_id, attempt_nr, job_status))
                        ignoreTmpError = False
                    else:
                        # usual add
                        tmpLog.debug("pid={0} : add job={1}.{2} st={3}".format(
                            uniq_pid, panda_id, attempt_nr, job_status))
                        ignoreTmpError = True
                    # get adder
                    adder_gen = AdderGen(taskBuffer,
                                         panda_id,
                                         job_status,
                                         attempt_nr,
                                         ignoreTmpError=ignoreTmpError,
                                         siteMapper=aSiteMapper,
                                         pid=uniq_pid,
                                         prelock_pid=uniq_pid,
                                         lock_offset=lock_interval -
                                         retry_interval)
                    n_processed += 1
                    # execute
                    adder_gen.run()
                    del adder_gen
                except Exception as e:
                    tmpLog.error("pid={} : failed to run with {} {}".format(
                        uniq_pid, str(e), traceback.format_exc()))
            # stats
            tmpLog.debug("pid={} : processed {}".format(uniq_pid, n_processed))

        # launcher, run with multiprocessing
        def proc_launch(self):
            # run
            self.process = multiprocessing.Process(target=self.run)
            self.process.start()

        # join of multiprocessing
        def proc_join(self):
            self.process.join()

    # TaskBuffer with more connections behind TaskBufferInterface
    tmpLog.debug("setup taskBufferIF")
    n_connections = 4
    _tbuf = TaskBuffer()
    _tbuf.init(panda_config.dbhost,
               panda_config.dbpasswd,
               nDBConnection=n_connections)
    taskBufferIF = TaskBufferInterface()
    taskBufferIF.launch(_tbuf)

    # add files
    tmpLog.debug("run Adder")

    interval = 10
    nLoop = 10
    for iLoop in range(10):
        tmpLog.debug('start iLoop={}/{}'.format(iLoop, nLoop))
        start_time = datetime.datetime.utcnow()
        adderThrList = []
        nThr = 10

        n_jors_per_batch = 1000

        jor_lists = WeightedLists(multiprocessing.Lock())

        # get some job output reports
        jor_list_others = taskBuffer.listJobOutputReport(
            only_unlocked=True,
            time_limit=lock_interval,
            limit=n_jors_per_batch * nThr,
            grace_period=gracePeriod,
            anti_labels=['user'])
        jor_lists.add(3, jor_list_others)
        jor_list_user = taskBuffer.listJobOutputReport(
            only_unlocked=True,
            time_limit=lock_interval,
            limit=n_jors_per_batch * nThr,
            grace_period=gracePeriod,
            labels=['user'])
        jor_lists.add(7, jor_list_user)

        # adder consumer processes
        _n_thr_with_tbuf = 0
        tbuf_list = []
        tmpLog.debug("got {} job reports".format(len(jor_lists)))
        for i in range(nThr):
            if i < _n_thr_with_tbuf:
                tbuf = TaskBuffer()
                tbuf_list.append(tbuf)
                tbuf.init(panda_config.dbhost,
                          panda_config.dbpasswd,
                          nDBConnection=1)
                thr = AdderThread(tbuf, aSiteMapper, jor_lists)
            else:
                thr = AdderThread(taskBufferIF.getInterface(), aSiteMapper,
                                  jor_lists)
            adderThrList.append(thr)
        # start all threads
        for thr in adderThrList:
            # thr.start()
            thr.proc_launch()
            time.sleep(0.25)

        # join all threads
        for thr in adderThrList:
            # thr.join()
            thr.proc_join()
        [tbuf.cleanup() for tbuf in tbuf_list]
        end_time = datetime.datetime.utcnow()
        sleep_time = interval - (end_time - start_time).seconds
        if sleep_time > 0 and iLoop + 1 < nLoop:
            sleep_time = random.randint(1, sleep_time)
            tmpLog.debug("sleep {} sec".format(sleep_time))
            time.sleep(sleep_time)

    # stop TaskBuffer IF
    taskBufferIF.stop()

    tmpLog.debug("===================== end =====================")

    # return
    return ret_val
Beispiel #7
0
except ImportError:
    from http.client import HTTPSConnection

import pandaserver.userinterface.Client as Client
from pandaserver.userinterface.Client import baseURLSSL

from pandaserver.taskbuffer.TaskBuffer import taskBuffer
from pandaserver.brokerage.SiteMapper import SiteMapper
from pandaserver.config import panda_config
from pandaserver.dataservice import DataServiceUtils
from pandasever.dataservice.DataServiceUtils import select_scope

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)
# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

id = sys.argv[1]
s, o = Client.getJobStatus([id])

if s != 0:
    print("failed to get job with:%s" % s)
    sys.exit(0)

job = o[0]

if job is None:
    print("got None")
    sys.exit(0)

xml = """<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
Beispiel #8
0
 def run(self):
     # start
     try:
         byCallback = False
         if self.job is None:
             byCallback = True
             _logger.debug("start: %s" % self.dataset.name)
             _logger.debug("callback from %s" % self.site)
             # FIXME when callback from BNLPANDA disappeared
             if self.site == 'BNLPANDA':
                 self.site = 'BNL-OSG2_ATLASMCDISK'
             # instantiate site mapper
             siteMapper = SiteMapper(self.taskBuffer)
             # get computingSite/destinationSE
             computingSite, destinationSE = self.taskBuffer.getDestSE(
                 self.dataset.name)
             if destinationSE is None:
                 # try to get computingSite/destinationSE from ARCH to delete sub
                 # even if no active jobs left
                 computingSite, destinationSE = self.taskBuffer.getDestSE(
                     self.dataset.name, True)
                 if destinationSE is None:
                     _logger.error("cannot get source/destination for %s" %
                                   self.dataset.name)
                     _logger.debug("end: %s" % self.dataset.name)
                     return
             _logger.debug("src: %s" % computingSite)
             _logger.debug("dst: %s" % destinationSE)
             # get corresponding token
             tmpSrcSiteSpec = siteMapper.getSite(computingSite)
             tmpDstSiteSpec = siteMapper.getSite(destinationSE)
             _logger.debug(tmpDstSiteSpec.setokens_output)
             destToken = None
             for scope in tmpDstSiteSpec.setokens_output:
                 for setoken in tmpDstSiteSpec.setokens_output[scope]:
                     for tmpDdmId in tmpDstSiteSpec.setokens_output[scope][
                             setoken]:
                         if self.site == tmpDdmId:
                             destToken = setoken
                             break
             _logger.debug("use Token=%s" % destToken)
             # get required tokens
             reqTokens = self.taskBuffer.getDestTokens(self.dataset.name)
             if reqTokens is None:
                 _logger.error("cannot get required token for %s" %
                               self.dataset.name)
                 _logger.debug("end: %s" % self.dataset.name)
                 return
             _logger.debug("req Token=%s" % reqTokens)
             # make bitmap for the token
             bitMap = 1
             if len(reqTokens.split(',')) > 1:
                 for tmpReqToken in reqTokens.split(','):
                     if tmpReqToken == destToken:
                         break
                     # shift one bit
                     bitMap <<= 1
             # completed bitmap
             compBitMap = (1 << len(reqTokens.split(','))) - 1
             # ignore the lowest bit for T1, file on DISK is already there
             # TODO: #prodanaly use the scope, but don't know job information
             if tmpSrcSiteSpec.ddm_output == tmpDstSiteSpec.ddm_output:
                 compBitMap = compBitMap & 0xFFFE
             # update bitmap in DB
             updatedBitMap = self.taskBuffer.updateTransferStatus(
                 self.dataset.name, bitMap)
             _logger.debug(
                 "transfer status:%s - comp:%s - bit:%s" %
                 (hex(updatedBitMap), hex(compBitMap), hex(bitMap)))
             # update output files
             if (updatedBitMap & compBitMap) == compBitMap:
                 ids = self.taskBuffer.updateOutFilesReturnPandaIDs(
                     self.dataset.name)
                 # set flag for T2 cleanup
                 self.dataset.status = 'cleanup'
                 self.taskBuffer.updateDatasets([self.dataset])
             else:
                 _logger.debug("end: %s" % self.dataset.name)
                 return
         else:
             _logger.debug("start: %s" % self.job.PandaID)
             # update input files
             ids = [self.job.PandaID]
         _logger.debug("IDs: %s" % ids)
         if len(ids) != 0:
             # get job
             if self.job is None:
                 jobs = self.taskBuffer.peekJobs(ids,
                                                 fromDefined=False,
                                                 fromArchived=False,
                                                 fromWaiting=False)
             else:
                 jobs = [self.job]
             # loop over all jobs
             for job in jobs:
                 if job is None:
                     continue
                 _logger.debug("Job: %s" % job.PandaID)
                 if job.jobStatus == 'transferring':
                     jobReady = True
                     failedFiles = []
                     noOutFiles = []
                     # check file status
                     for file in job.Files:
                         if file.type == 'output' or file.type == 'log':
                             if file.status == 'failed':
                                 failedFiles.append(file.lfn)
                             elif file.status == 'nooutput':
                                 noOutFiles.append(file.lfn)
                             elif file.status != 'ready':
                                 _logger.debug(
                                     "Job: %s file:%s %s != ready" %
                                     (job.PandaID, file.lfn, file.status))
                                 jobReady = False
                                 break
                     # finish job
                     if jobReady:
                         if byCallback:
                             _logger.debug("Job: %s all files ready" %
                                           job.PandaID)
                         else:
                             _logger.debug(
                                 "Job: %s all files checked with catalog" %
                                 job.PandaID)
                         # create XML
                         try:
                             import xml.dom.minidom
                             dom = xml.dom.minidom.getDOMImplementation()
                             doc = dom.createDocument(None, 'xml', None)
                             topNode = doc.createElement("POOLFILECATALOG")
                             for file in job.Files:
                                 if file.type in ['output', 'log']:
                                     # skip failed or no-output files
                                     if file.lfn in failedFiles + noOutFiles:
                                         continue
                                     # File
                                     fileNode = doc.createElement("File")
                                     fileNode.setAttribute("ID", file.GUID)
                                     # LFN
                                     logNode = doc.createElement("logical")
                                     lfnNode = doc.createElement("lfn")
                                     lfnNode.setAttribute('name', file.lfn)
                                     # metadata
                                     fsizeNode = doc.createElement(
                                         "metadata")
                                     fsizeNode.setAttribute(
                                         "att_name", "fsize")
                                     fsizeNode.setAttribute(
                                         "att_value", str(file.fsize))
                                     # checksum
                                     if file.checksum.startswith('ad:'):
                                         # adler32
                                         chksumNode = doc.createElement(
                                             "metadata")
                                         chksumNode.setAttribute(
                                             "att_name", "adler32")
                                         chksumNode.setAttribute(
                                             "att_value",
                                             re.sub('^ad:', '',
                                                    file.checksum))
                                     else:
                                         # md5sum
                                         chksumNode = doc.createElement(
                                             "metadata")
                                         chksumNode.setAttribute(
                                             "att_name", "md5sum")
                                         chksumNode.setAttribute(
                                             "att_value",
                                             re.sub('^md5:', '',
                                                    file.checksum))
                                     # append nodes
                                     logNode.appendChild(lfnNode)
                                     fileNode.appendChild(logNode)
                                     fileNode.appendChild(fsizeNode)
                                     fileNode.appendChild(chksumNode)
                                     topNode.appendChild(fileNode)
                             # status of the job record
                             if failedFiles == []:
                                 record_status = 'finished'
                             else:
                                 record_status = 'failed'
                             # write to file
                             # xmlFile = '%s/%s_%s_%s' % (panda_config.logdir,job.PandaID,record_status,
                             #                            str(uuid.uuid4()))
                             # oXML = open(xmlFile,"w")
                             # oXML.write(topNode.toxml())
                             # oXML.close()
                             # write to job output report table, try update first
                             tmp_ret = self.taskBuffer.updateJobOutputReport(
                                 panda_id=job.PandaID,
                                 attempt_nr=job.attemptNr,
                                 data=topNode.toxml())
                             if not tmp_ret:
                                 # then try insert
                                 self.taskBuffer.insertJobOutputReport(
                                     panda_id=job.PandaID,
                                     prod_source_label=job.prodSourceLabel,
                                     job_status=record_status,
                                     attempt_nr=job.attemptNr,
                                     data=topNode.toxml())
                         except Exception:
                             type, value, traceBack = sys.exc_info()
                             _logger.error("Job: %s %s %s" %
                                           (job.PandaID, type, value))
                 _logger.debug("Job: %s status: %s" %
                               (job.PandaID, job.jobStatus))
         # end
         if self.job is None:
             _logger.debug("end: %s" % self.dataset.name)
         else:
             _logger.debug("end: %s" % self.job.PandaID)
     except Exception:
         type, value, traceBack = sys.exc_info()
         _logger.error("run() : %s %s" % (type, value))
Beispiel #9
0
from pandaserver.taskbuffer import ErrorCode

# password
from pandaserver.config import panda_config

# logger
_logger = PandaLogger().getLogger('esPreemption')
tmpLog = LogWrapper(_logger)

tmpLog.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

# time limit
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=15)

# get low priority ES jobs per site
sqlEsJobs = "SELECT PandaID,computingSite,commandToPilot,startTime "
sqlEsJobs += "FROM {0}.jobsActive4 ".format(panda_config.schemaPANDA)
sqlEsJobs += "WHERE prodSourceLabel IN (:label1,:label2) AND eventService=:es "
sqlEsJobs += "AND currentPriority<:prio AND jobStatus=:jobStat "
sqlEsJobs += "ORDER BY currentPriority,PandaID "

varMap = {}
varMap[':label1'] = 'managed'
varMap[':label2'] = 'test'
varMap[':es'] = 1
Beispiel #10
0
def main(argv=tuple(), tbuf=None, **kwargs):

    try:
        long
    except NameError:
        long = int

    tmpLog = LogWrapper(_logger, None)

    tmpLog.debug("===================== start =====================")

    # current minute
    currentMinute = datetime.datetime.utcnow().minute

    # instantiate TB
    if tbuf is None:
        from pandaserver.taskbuffer.TaskBuffer import taskBuffer
        taskBuffer.init(panda_config.dbhost,
                        panda_config.dbpasswd,
                        nDBConnection=1)
    else:
        taskBuffer = tbuf

    # instantiate sitemapper
    aSiteMapper = SiteMapper(taskBuffer)

    # delete
    tmpLog.debug("Del session")
    status, retSel = taskBuffer.querySQLS(
        "SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4", {})
    if retSel is not None:
        try:
            maxID = retSel[0][0]
            tmpLog.debug("maxID : %s" % maxID)
            if maxID is not None:
                varMap = {}
                varMap[':maxID'] = maxID
                varMap[':jobStatus1'] = 'activated'
                varMap[':jobStatus2'] = 'waiting'
                varMap[':jobStatus3'] = 'failed'
                varMap[':jobStatus4'] = 'cancelled'
                status, retDel = taskBuffer.querySQLS(
                    "DELETE FROM ATLAS_PANDA.jobsDefined4 WHERE PandaID<:maxID AND jobStatus IN (:jobStatus1,:jobStatus2,:jobStatus3,:jobStatus4)",
                    varMap)
        except Exception:
            pass

    # count # of getJob/updateJob in dispatcher's log
    try:
        # don't update when logrotate is running
        timeNow = datetime.datetime.utcnow()
        logRotateTime = timeNow.replace(hour=3,
                                        minute=2,
                                        second=0,
                                        microsecond=0)
        if (timeNow > logRotateTime and (timeNow-logRotateTime) < datetime.timedelta(minutes=5)) or \
               (logRotateTime > timeNow and (logRotateTime-timeNow) < datetime.timedelta(minutes=5)):
            tmpLog.debug("skip pilotCounts session for logrotate")
        else:
            # log filename
            dispLogName = '%s/panda-PilotRequests.log' % panda_config.logdir
            # time limit
            timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
                hours=3)
            timeLimitS = datetime.datetime.utcnow() - datetime.timedelta(
                hours=1)
            # check if tgz is required
            com = 'head -1 %s' % dispLogName
            lostat, loout = commands_get_status_output(com)
            useLogTgz = True
            if lostat == 0:
                match = re.search('^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}',
                                  loout)
                if match is not None:
                    startTime = datetime.datetime(*time.strptime(
                        match.group(0), '%Y-%m-%d %H:%M:%S')[:6])
                    # current log contains all info
                    if startTime < timeLimit:
                        useLogTgz = False
            # log files
            dispLogNameList = [dispLogName]
            if useLogTgz:
                today = datetime.date.today()
                dispLogNameList.append('{0}-{1}.gz'.format(
                    dispLogName, today.strftime('%Y%m%d')))
            # delete tmp
            commands_get_status_output('rm -f %s.tmp-*' % dispLogName)
            # tmp name
            tmpLogName = '%s.tmp-%s' % (dispLogName, datetime.datetime.utcnow(
            ).strftime('%Y-%m-%d-%H-%M-%S'))
            # loop over all files
            pilotCounts = {}
            pilotCountsS = {}
            for tmpDispLogName in dispLogNameList:
                # expand or copy
                if tmpDispLogName.endswith('.gz'):
                    com = 'gunzip -c %s > %s' % (tmpDispLogName, tmpLogName)
                else:
                    com = 'cp %s %s' % (tmpDispLogName, tmpLogName)
                lostat, loout = commands_get_status_output(com)
                if lostat != 0:
                    errMsg = 'failed to expand/copy %s with : %s' % (
                        tmpDispLogName, loout)
                    raise RuntimeError(errMsg)
                # search string
                sStr = '^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*'
                sStr += 'method=(.+),site=(.+),node=(.+),type=(.+)'
                # read
                logFH = open(tmpLogName)
                for line in logFH:
                    # check format
                    match = re.search(sStr, line)
                    if match is not None:
                        # check timerange
                        timeStamp = datetime.datetime(*time.strptime(
                            match.group(1), '%Y-%m-%d %H:%M:%S')[:6])
                        if timeStamp < timeLimit:
                            continue
                        tmpMethod = match.group(2)
                        tmpSite = match.group(3)
                        tmpNode = match.group(4)
                        tmpType = match.group(5)

                        # protection against corrupted entries from pilot,
                        # e.g. pilot reading site json from cvmfs while it was being updated
                        if tmpSite not in aSiteMapper.siteSpecList:
                            continue
                        # sum
                        pilotCounts.setdefault(tmpSite, {})
                        pilotCounts[tmpSite].setdefault(tmpMethod, {})
                        pilotCounts[tmpSite][tmpMethod].setdefault(tmpNode, 0)
                        pilotCounts[tmpSite][tmpMethod][tmpNode] += 1
                        # short
                        if timeStamp > timeLimitS:
                            if tmpSite not in pilotCountsS:
                                pilotCountsS[tmpSite] = dict()
                            if tmpMethod not in pilotCountsS[tmpSite]:
                                pilotCountsS[tmpSite][tmpMethod] = dict()
                            if tmpNode not in pilotCountsS[tmpSite][tmpMethod]:
                                pilotCountsS[tmpSite][tmpMethod][tmpNode] = 0
                            pilotCountsS[tmpSite][tmpMethod][tmpNode] += 1
                # close
                logFH.close()
            # delete tmp
            commands_get_status_output('rm %s' % tmpLogName)
            # update
            hostID = panda_config.pserverhost.split('.')[0]
            tmpLog.debug("pilotCounts session")
            retPC = taskBuffer.updateSiteData(hostID, pilotCounts, interval=3)
            tmpLog.debug(retPC)
            retPC = taskBuffer.updateSiteData(hostID, pilotCountsS, interval=1)
            tmpLog.debug(retPC)
    except Exception:
        errType, errValue = sys.exc_info()[:2]
        tmpLog.error("updateJob/getJob : %s %s" % (errType, errValue))

    # nRunning
    tmpLog.debug("nRunning session")
    try:
        if (currentMinute / panda_config.nrun_interval
            ) % panda_config.nrun_hosts == panda_config.nrun_snum:
            retNR = taskBuffer.insertnRunningInSiteData()
            tmpLog.debug(retNR)
    except Exception:
        errType, errValue = sys.exc_info()[:2]
        tmpLog.error("nRunning : %s %s" % (errType, errValue))

    # session for co-jumbo jobs
    tmpLog.debug("co-jumbo session")
    try:
        ret = taskBuffer.getCoJumboJobsToBeFinished(30, 0, 1000)
        if ret is None:
            tmpLog.debug("failed to get co-jumbo jobs to finish")
        else:
            coJumboA, coJumboD, coJumboW, coJumboTokill = ret
            tmpLog.debug("finish {0} co-jumbo jobs in Active".format(
                len(coJumboA)))
            if len(coJumboA) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboA,
                                               fromDefined=False,
                                               fromActive=True,
                                               fromArchived=False,
                                               fromWaiting=False)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], False)
            tmpLog.debug("finish {0} co-jumbo jobs in Defined".format(
                len(coJumboD)))
            if len(coJumboD) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboD,
                                               fromDefined=True,
                                               fromActive=False,
                                               fromArchived=False,
                                               fromWaiting=False)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], True)
            tmpLog.debug("finish {0} co-jumbo jobs in Waiting".format(
                len(coJumboW)))
            if len(coJumboW) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboW,
                                               fromDefined=False,
                                               fromActive=False,
                                               fromArchived=False,
                                               fromWaiting=True)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], False, True)
            tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format(
                len(coJumboTokill)))
            if len(coJumboTokill) > 0:
                jediJobs = list(coJumboTokill)
                nJob = 100
                iJob = 0
                while iJob < len(jediJobs):
                    tmpLog.debug(' killing %s' %
                                 str(jediJobs[iJob:iJob + nJob]))
                    Client.killJobs(jediJobs[iJob:iJob + nJob],
                                    51,
                                    keepUnmerged=True)
                    iJob += nJob
    except Exception:
        errStr = traceback.format_exc()
        tmpLog.error(errStr)

    tmpLog.debug("Fork session")

    # thread for fork
    class ForkThr(threading.Thread):
        def __init__(self, fileName):
            threading.Thread.__init__(self)
            self.fileName = fileName

        def run(self):
            if 'VIRTUAL_ENV' in os.environ:
                prefix = os.environ['VIRTUAL_ENV']
            else:
                prefix = ''
            setupStr = 'source {0}/etc/sysconfig/panda_server; '.format(prefix)
            runStr = '%s/python -Wignore ' % panda_config.native_python
            runStr += panda_config.pandaPython_dir + '/dataservice/forkSetupper.py -i '
            runStr += self.fileName
            if self.fileName.split('/')[-1].startswith('set.NULL.'):
                runStr += ' -t'
            comStr = setupStr + runStr
            tmpLog.debug(comStr)
            commands_get_status_output(comStr)

    # get set.* files
    filePatt = panda_config.logdir + '/' + 'set.*'
    fileList = glob.glob(filePatt)

    # the max number of threads
    maxThr = 10
    nThr = 0

    # loop over all files
    forkThrList = []
    timeNow = datetime.datetime.utcnow()
    for tmpName in fileList:
        if not os.path.exists(tmpName):
            continue
        try:
            # takes care of only recent files
            modTime = datetime.datetime(
                *(time.gmtime(os.path.getmtime(tmpName))[:7]))
            if (timeNow - modTime) > datetime.timedelta(minutes=1) and \
                    (timeNow - modTime) < datetime.timedelta(hours=1):
                cSt, cOut = commands_get_status_output(
                    'ps aux | grep fork | grep -v PYTH')
                # if no process is running for the file
                if cSt == 0 and tmpName not in cOut:
                    nThr += 1
                    thr = ForkThr(tmpName)
                    thr.start()
                    forkThrList.append(thr)
                    if nThr > maxThr:
                        break
        except Exception:
            errType, errValue = sys.exc_info()[:2]
            tmpLog.error("%s %s" % (errType, errValue))

    # join fork threads
    for thr in forkThrList:
        thr.join()

    # terminate TaskBuffer IF
    # taskBufferIF.terminate()

    tmpLog.debug("===================== end =====================")