예제 #1
0
 def __call__(self, *args, **kwargs):
     log = LogWrapper(
         _logger,
         'pid={} thr={} {}'.format(os.getpid(),
                                   threading.current_thread().ident,
                                   self.methodName))
     log.debug('start')
     # get lock among children
     i = self.childlock.get()
     # make dict to send it master
     self.commDict[i].update({
         'methodName': self.methodName,
         'args': pickle.dumps(args),
         'kwargs': pickle.dumps(kwargs)
     })
     # send notification to master
     self.comLock[i].release()
     # wait response
     self.resLock[i].acquire()
     res = pickle.loads(self.commDict[i]['res'])
     statusCode = self.commDict[i]['stat']
     # release lock to children
     self.childlock.put(i)
     log.debug('end')
     # return
     if statusCode == 0:
         return res
     else:
         errtype, errvalue = res
         raise RuntimeError("{0}: {1} {2}".format(self.methodName,
                                                  errtype.__name__,
                                                  errvalue))
예제 #2
0
 def __init__(self,
              taskBuffer,
              jobID,
              jobStatus,
              attemptNr,
              ignoreTmpError=True,
              siteMapper=None,
              pid=None,
              prelock_pid=None,
              lock_offset=10):
     self.job = None
     self.jobID = jobID
     self.jobStatus = jobStatus
     self.taskBuffer = taskBuffer
     self.ignoreTmpError = ignoreTmpError
     self.lock_offset = lock_offset
     self.siteMapper = siteMapper
     self.datasetMap = {}
     self.extraInfo = {
         'surl': {},
         'nevents': {},
         'lbnr': {},
         'endpoint': {},
         'guid': {}
     }
     self.attemptNr = attemptNr
     self.pid = pid
     self.prelock_pid = prelock_pid
     self.data = None
     # logger
     self.logger = LogWrapper(_logger, str(self.jobID))
예제 #3
0
 def run(self):
     # get logger
     tmpLog = LogWrapper(_logger,'<vuid={0} site={1} name={2}>'.format(self.vuid,
                                                                       self.site,
                                                                       self.dataset))
     # query dataset
     tmpLog.debug("start")
     if self.vuid is not None:
         dataset = self.taskBuffer.queryDatasetWithMap({'vuid':self.vuid})
     else:
         dataset = self.taskBuffer.queryDatasetWithMap({'name':self.dataset})
     if dataset is None:
         tmpLog.error("Not found")
         tmpLog.debug("end")
         return
     tmpLog.debug("type:%s name:%s" % (dataset.type,dataset.name))
     if dataset.type == 'dispatch':
         # activate jobs in jobsDefined
         Activator(self.taskBuffer,dataset).start()
     if dataset.type == 'output':
         if dataset.name is not None and re.search('^panda\..*_zip$',dataset.name) is not None:
             # start unmerge jobs
             Activator(self.taskBuffer,dataset,enforce=True).start()
         else:
             # finish transferring jobs
             Finisher(self.taskBuffer,dataset,site=self.site).start()
     tmpLog.debug("end")
예제 #4
0
def put_file_recovery_request(req, jediTaskID, dryRun=None):
    if not Protocol.isSecure(req):
        return json.dumps((False, "ERROR : no HTTPS"))
    userName = req.subprocess_env['SSL_CLIENT_S_DN']
    creationTime = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
    tmpLog = LogWrapper(_logger, 'put_file_recovery_request < jediTaskID={}'.format(jediTaskID))
    tmpLog.debug("start user={}".format(userName))
    # get total size
    try:
        jediTaskID = int(jediTaskID)
        # make filename
        evpFileName = '%s/recov.%s' % (panda_config.cache_dir,str(uuid.uuid4()))
        tmpLog.debug("file={}".format(evpFileName))
        # write
        with open(evpFileName, 'w') as fo:
            data = {"userName": userName,
                    "creationTime": creationTime,
                    "jediTaskID": int(jediTaskID)
                    }
            if dryRun:
                data['dryRun'] = True
            json.dump(data, fo)
    except Exception as e:
        errStr = "cannot put request due to {} ".format(str(e))
        tmpLog.error(errStr + traceback.format_exc())
        return json.dumps((False, errStr))
    tmpLog.debug('done')
    return json.dumps((True, 'request was accepted and will be processed in a few minutes'))
def main(tbuf=None, **kwargs):
    # logger
    tmpLog = LogWrapper(_logger)

    tmpLog.debug("================= start ==================")
    # instantiate TB
    if tbuf is None:
        from pandaserver.taskbuffer.TaskBuffer import taskBuffer
        taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)
    else:
        taskBuffer = tbuf

    # instantiate MyProxy I/F
    my_proxy_interface_instance = panda_proxy_cache.MyProxyInterface()

    # roles
    if hasattr(panda_config,'proxy_cache_roles'):
        roles = panda_config.proxy_cache_roles.split(',')
    else:
        roles = ['atlas','atlas:/atlas/Role=production','atlas:/atlas/Role=pilot']
    # get users
    sql = 'select distinct DN FROM ATLAS_PANDAMETA.users WHERE GRIDPREF LIKE :patt'
    varMap = {}
    varMap[':patt'] = '%p%'
    tmpStat,tmpRes = taskBuffer.querySQLS(sql,varMap)
    for realDN, in tmpRes:
        if realDN is None:
            continue
        realDN = CoreUtils.get_bare_dn(realDN, keep_digits=False)
        name = taskBuffer.cleanUserID(realDN)
        # check proxy
        tmpLog.debug("check proxy cache for {}".format(name))
        for role in roles:
            my_proxy_interface_instance.checkProxy(realDN, role=role, name=name)
    tmpLog.debug("done")
예제 #6
0
 def __init__(self,
              taskBuffer,
              jobID,
              jobStatus,
              xmlFile,
              ignoreTmpError=True,
              siteMapper=None):
     self.job = None
     self.jobID = jobID
     self.jobStatus = jobStatus
     self.taskBuffer = taskBuffer
     self.ignoreTmpError = ignoreTmpError
     self.lockXML = None
     self.siteMapper = siteMapper
     self.attemptNr = None
     self.xmlFile = xmlFile
     self.datasetMap = {}
     self.extraInfo = {
         'surl': {},
         'nevents': {},
         'lbnr': {},
         'endpoint': {},
         'guid': {}
     }
     # exstract attemptNr
     try:
         tmpAttemptNr = self.xmlFile.split('/')[-1].split('_')[-1]
         if re.search('^\d+$', tmpAttemptNr) is not None:
             self.attemptNr = int(tmpAttemptNr)
     except Exception:
         pass
     # logger
     self.logger = LogWrapper(_logger, str(self.jobID))
예제 #7
0
 def __init__(self, taskBuffer, siteMapper, evpFileName, ignoreError):
     self.taskBuffer = taskBuffer
     self.siteMapper = siteMapper
     self.ignoreError = ignoreError
     self.evpFileName = evpFileName
     self.token = datetime.datetime.utcnow().isoformat(' ')
     # logger
     self.logger = LogWrapper(_logger, self.token)
     self.pd2p = DynDataDistributer.DynDataDistributer([],
                                                       self.taskBuffer,
                                                       self.siteMapper,
                                                       token=' ',
                                                       logger=self.logger)
     self.userDatasetName = ''
     self.creationTime = ''
     self.params = ''
     self.lockedBy = ''
     self.evpFile = None
     self.userTaskName = ''
     # message buffer
     self.msgBuffer = []
     self.lineLimit = 100
     # JEDI
     self.jediTaskID = None
     self.prodSourceLabel = None
     self.job_label = None
예제 #8
0
파일: Utils.py 프로젝트: eschanet/QMonit
def uploadLog(req, file):
    if not Protocol.isSecure(req):
        return False
    if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']:
        return False
    tmpLog = LogWrapper(_logger, 'uploadLog <{0}>'.format(file.filename))
    tmpLog.debug("start {0}".format(req.subprocess_env['SSL_CLIENT_S_DN']))
    # size check
    sizeLimit = 100 * 1024 * 1024
    # get file size
    contentLength = 0
    try:
        contentLength = long(req.headers_in["content-length"])
    except Exception:
        if "content-length" in req.headers_in:
            tmpLog.error("cannot get CL : %s" %
                         req.headers_in["content-length"])
        else:
            tmpLog.error("no CL")
    tmpLog.debug("size %s" % contentLength)
    if contentLength > sizeLimit:
        errStr = "failed to upload log due to size limit"
        tmpLog.error(errStr)
        tmpLog.debug("end")
        return errStr
    jediLogDir = '/jedilog'
    retStr = ''
    try:
        fileBaseName = file.filename.split('/')[-1]
        fileFullPath = '{0}{1}/{2}'.format(panda_config.cache_dir, jediLogDir,
                                           fileBaseName)
        # delete old file
        if os.path.exists(fileFullPath):
            os.remove(fileFullPath)
        # write
        fo = open(fileFullPath, 'wb')
        fileContent = file.file.read()
        fo.write(fileContent)
        fo.close()
        tmpLog.debug("written to {0}".format(fileFullPath))
        retStr = 'http://{0}/cache{1}/{2}'.format(getServerHTTP(None),
                                                  jediLogDir, fileBaseName)
    except Exception:
        errtype, errvalue = sys.exc_info()[:2]
        errStr = "failed to write log with {0}:{1}".format(
            errtype.__name__, errvalue)
        tmpLog.error(errStr)
        tmpLog.debug("end")
        return errStr
    tmpLog.debug("end")
    return retStr
예제 #9
0
def delete_checkpoint(req, task_id, sub_id):
    tmpLog = LogWrapper(_logger, 'delete_checkpoint <jediTaskID={0} ID={1}>'.format(task_id, sub_id))
    status = True
    if not Protocol.isSecure(req):
        msg = 'insecure request'
        tmpLog.error(msg)
        status = False
    else:
        tmpLog.debug("start %s" % req.subprocess_env['SSL_CLIENT_S_DN'])
        try:
            fileFullPath = os.path.join(panda_config.cache_dir, get_checkpoint_filename(task_id, sub_id))
            os.remove(fileFullPath)
            msg = 'done'
            tmpLog.debug(msg)
        except Exception as e:
            msg = "failed to delete file due to {0}".format(str(e))
            tmpLog.error(msg)
            status = False
    return json.dumps({'status': status, 'message': msg})
예제 #10
0
def put_checkpoint(req, file):
    tmpLog = LogWrapper(_logger, 'put_checkpoint <jediTaskID_subID={0}>'.format(file.filename))
    status = False
    if not Protocol.isSecure(req):
        errStr = 'insecure request'
        tmpLog.error(errStr)
        return json.dumps({'status': status, 'message': errStr})
    tmpLog.debug("start %s" % req.subprocess_env['SSL_CLIENT_S_DN'])
    # extract taskID and subID
    try:
        task_id, sub_id = file.filename.split('/')[-1].split('_')
    except Exception:
        errStr = 'failed to extract ID'
        tmpLog.error(errStr)
        return json.dumps({'status': status, 'message': errStr})
    # size check
    sizeLimit = 500 * 1024 * 1024
    # get file size
    try:
        contentLength = long(req.headers_in["content-length"])
    except Exception as e:
        errStr = "cannot get int(content-length) due to {0}".format(str(e))
        tmpLog.error(errStr)
        return json.dumps({'status': status, 'message': errStr})
    tmpLog.debug("size %s" % contentLength)
    if contentLength > sizeLimit:
        errStr = "exceeded size limit %s>%s" % (contentLength, sizeLimit)
        tmpLog.error(errStr)
        return json.dumps({'status': status, 'message': errStr})
    try:
        fileFullPath = os.path.join(panda_config.cache_dir, get_checkpoint_filename(task_id, sub_id))
        # write
        with open(fileFullPath,'wb') as fo:
            fo.write(file.file.read())
    except Exception as e:
        errStr = "cannot write file due to {0}".format(str(e))
        tmpLog.error(errStr)
        return json.dumps({'status': status, 'message': errStr})
    status = True
    tmpMsg = "successfully placed at {0}".format(fileFullPath)
    tmpLog.debug(tmpMsg)
    return json.dumps({'status': status, 'message': tmpMsg})
예제 #11
0
def put_workflow_request(req, data, check=False):
    if not Protocol.isSecure(req):
        return json.dumps((False, "ERROR : no HTTPS"))
    userName = req.subprocess_env['SSL_CLIENT_S_DN']
    creationTime = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
    tmpLog = LogWrapper(_logger, 'put_workflow_request')
    tmpLog.debug("start user={} check={}".format(userName, check))
    if check == 'True' or check is True:
        check = True
    else:
        check = False
    # get total size
    try:
        # make filename
        evpFileName = '%s/workflow.%s' % (panda_config.cache_dir,str(uuid.uuid4()))
        tmpLog.debug("file={}".format(evpFileName))
        # write
        with open(evpFileName, 'w') as fo:
            data = {"userName": userName,
                    "creationTime": creationTime,
                    "data": json.loads(data),
                    }
            json.dump(data, fo)
        # check
        if check:
            tmpLog.debug('checking')
            from pandaserver.taskbuffer.workflow_processor import WorkflowProcessor
            processor = WorkflowProcessor(log_stream=_logger)
            ret = processor.process(evpFileName, True, True, True, True)
            if os.path.exists(evpFileName):
                try:
                    os.remove(evpFileName)
                except Exception:
                    pass
            tmpLog.debug('done')
            return json.dumps((True, ret))
    except Exception as e:
        errStr = "cannot put request due to {} ".format(str(e))
        tmpLog.error(errStr + traceback.format_exc())
        return json.dumps((False, errStr))
    tmpLog.debug('done')
    return json.dumps((True, 'request was accepted and will be processed in a few minutes'))
예제 #12
0
 def checkProxy(self, user_dn, production=False, role=None, name=None):
     log_stream = LogWrapper(_logger,
                             '< name="{}" role={} >'.format(name, role))
     log_stream.info('check proxy for {}'.format(user_dn))
     """Check the validity of a proxy."""
     if role is not None:
         tmpExtension = self.getExtension(role)
         proxy_path = os.path.join(
             self.__target_path,
             str(hashlib.sha1(six.b(user_dn + tmpExtension)).hexdigest()))
     elif production:
         proxy_path = os.path.join(
             self.__target_path,
             str(hashlib.sha1(six.b(user_dn + '.prod')).hexdigest()))
     else:
         proxy_path = os.path.join(self.__target_path,
                                   hashlib.sha1(six.b(user_dn)).hexdigest())
     isOK = False
     if os.path.isfile(proxy_path):
         log_stream.info('proxy is there. Need to check validity')
         cmd = "voms-proxy-info -exists -hours 94 -file %s" % proxy_path
         stdout, stderr, status = execute(cmd, log_stream)
         if stdout:
             log_stream.info('stdout is %s ' % stdout)
         if stderr:
             log_stream.info('stderr is %s ' % stderr)
         if status == 1:
             log_stream.info(
                 'proxy expires in 94h or less. We need to renew proxy!')
             ret = self.store(user_dn,
                              self.__cred_name,
                              production,
                              role=role,
                              log_stream=log_stream)
             if ret == 0:
                 log_stream.info('proxy retrieval successful')
                 isOK = True
             elif ret == 2:
                 log_stream.info('proxy retrieval on hold')
             else:
                 log_stream.error('proxy retrieval failed')
         else:
             log_stream.info('proxy is valid for more than 3 days')
             isOK = True
     else:
         log_stream.info(
             'proxy is not in the cache repo. will try to get it from myproxy'
         )
         ret = self.store(user_dn,
                          self.__cred_name,
                          production,
                          role=role,
                          log_stream=log_stream)
         if ret == 0:
             log_stream.info('proxy stored successfully')
             isOK = True
         elif ret == 2:
             log_stream.info('proxy retrieval on hold')
         else:
             log_stream.error('proxy retrieval failed')
     if isOK:
         plain_path = os.path.join(
             self.__target_path,
             hashlib.sha1(six.b(user_dn + '.plain')).hexdigest())
         if os.path.isfile(plain_path):
             return self.checkValidity(plain_path, log_stream)
         else:
             log_stream.error('plain proxy not there at the moment!')
예제 #13
0
def putFile(req, file):
    tmpLog = LogWrapper(_logger, 'putFile-{}'.format(datetime.datetime.utcnow().isoformat('/')))
    if not Protocol.isSecure(req):
        tmpLog.error('No SSL_CLIENT_S_DN')
        return False
    if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']:
        return False
    # user name
    username = CoreUtils.clean_user_id(req.subprocess_env['SSL_CLIENT_S_DN'])
    tmpLog.debug("start %s %s" % (username, file.filename))
    # size check
    fullSizeLimit = 768*1024*1024
    if not file.filename.startswith('sources.'):
        noBuild = True
        sizeLimit = 100*1024*1024
    else:
        noBuild = False
        sizeLimit = fullSizeLimit
    # get file size
    contentLength = 0
    try:
        contentLength = long(req.headers_in["content-length"])
    except Exception:
        if "content-length" in req.headers_in:
            tmpLog.error("cannot get CL : %s" % req.headers_in["content-length"])
        else:
            tmpLog.error("no CL")
    tmpLog.debug("size %s" % contentLength)
    if contentLength > sizeLimit:
        errStr = "ERROR : Upload failure. Exceeded size limit %s>%s." % (contentLength,sizeLimit)
        if noBuild:
            errStr += " Please submit the job without --noBuild/--libDS since those options impose a tighter size limit"
        else:
            errStr += " Please remove redundant files from your workarea"
        tmpLog.error(errStr)
        tmpLog.debug("end")
        return errStr
    try:
        fileName = file.filename.split('/')[-1]
        fileFullPath = '%s/%s' % (panda_config.cache_dir, fileName)

        # avoid overwriting
        if os.path.exists(fileFullPath):
            # touch
            os.utime(fileFullPath,None)
            # send error message
            errStr = "ERROR : Cannot overwrite file"
            tmpLog.debug('cannot overwrite file %s' % fileName)
            tmpLog.debug("end")
            return errStr
        # write
        fo = open(fileFullPath,'wb')
        fileContent = file.file.read()
        if hasattr(panda_config, 'compress_file_names') and \
                [True for patt in panda_config.compress_file_names.split(',') if re.search(patt, fileName) is not None]:
            fileContent = gzip.compress(fileContent)
        fo.write(fileContent)
        fo.close()
    except Exception:
        errStr = "ERROR : Cannot write file"
        tmpLog.error(errStr)
        tmpLog.debug("end")
        return errStr
    # checksum
    try:
        # decode Footer
        footer = fileContent[-8:]
        checkSum,isize = struct.unpack("II",footer)
        tmpLog.debug("CRC from gzip Footer %s" % checkSum)
    except Exception:
        # calculate on the fly
        """
        import zlib
        checkSum = zlib.adler32(fileContent) & 0xFFFFFFFF
        """
        # use None to avoid delay for now
        checkSum = None
        tmpLog.debug("CRC calculated %s" % checkSum)
    # file size
    fileSize = len(fileContent)
    tmpLog.debug("written dn=%s file=%s size=%s crc=%s" % \
                  (username, fileFullPath, fileSize, checkSum))
    # put file info to DB
    if panda_config.record_sandbox_info:
        to_insert = True
        for patt in IGNORED_SUFFIX:
            if file.filename.endswith(patt):
                to_insert = False
                break
        if not to_insert:
            tmpLog.debug("skipped to insert to DB")
        else:
            statClient,outClient = Client.insertSandboxFileInfo(username,file.filename,
                                                                fileSize,checkSum)
            if statClient != 0 or outClient.startswith("ERROR"):
                tmpLog.error("failed to put sandbox to DB with %s %s" % (statClient,outClient))
                #_logger.debug("putFile : end")
                #return "ERROR : Cannot insert sandbox to DB"
            else:
                tmpLog.debug("inserted sandbox to DB with %s" % outClient)
    tmpLog.debug("end")
    return True
예제 #14
0
def main(argv=tuple(), tbuf=None, **kwargs):

    try:
        long
    except NameError:
        long = int

    tmpLog = LogWrapper(_logger, None)

    tmpLog.debug("===================== start =====================")

    # current minute
    currentMinute = datetime.datetime.utcnow().minute

    # instantiate TB
    if tbuf is None:
        from pandaserver.taskbuffer.TaskBuffer import taskBuffer
        taskBuffer.init(panda_config.dbhost,
                        panda_config.dbpasswd,
                        nDBConnection=1)
    else:
        taskBuffer = tbuf

    # instantiate sitemapper
    aSiteMapper = SiteMapper(taskBuffer)

    # delete
    tmpLog.debug("Del session")
    status, retSel = taskBuffer.querySQLS(
        "SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4", {})
    if retSel is not None:
        try:
            maxID = retSel[0][0]
            tmpLog.debug("maxID : %s" % maxID)
            if maxID is not None:
                varMap = {}
                varMap[':maxID'] = maxID
                varMap[':jobStatus1'] = 'activated'
                varMap[':jobStatus2'] = 'waiting'
                varMap[':jobStatus3'] = 'failed'
                varMap[':jobStatus4'] = 'cancelled'
                status, retDel = taskBuffer.querySQLS(
                    "DELETE FROM ATLAS_PANDA.jobsDefined4 WHERE PandaID<:maxID AND jobStatus IN (:jobStatus1,:jobStatus2,:jobStatus3,:jobStatus4)",
                    varMap)
        except Exception:
            pass

    # count # of getJob/updateJob in dispatcher's log
    try:
        # don't update when logrotate is running
        timeNow = datetime.datetime.utcnow()
        logRotateTime = timeNow.replace(hour=3,
                                        minute=2,
                                        second=0,
                                        microsecond=0)
        if (timeNow > logRotateTime and (timeNow-logRotateTime) < datetime.timedelta(minutes=5)) or \
               (logRotateTime > timeNow and (logRotateTime-timeNow) < datetime.timedelta(minutes=5)):
            tmpLog.debug("skip pilotCounts session for logrotate")
        else:
            # log filename
            dispLogName = '%s/panda-PilotRequests.log' % panda_config.logdir
            # time limit
            timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
                hours=3)
            timeLimitS = datetime.datetime.utcnow() - datetime.timedelta(
                hours=1)
            # check if tgz is required
            com = 'head -1 %s' % dispLogName
            lostat, loout = commands_get_status_output(com)
            useLogTgz = True
            if lostat == 0:
                match = re.search('^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}',
                                  loout)
                if match is not None:
                    startTime = datetime.datetime(*time.strptime(
                        match.group(0), '%Y-%m-%d %H:%M:%S')[:6])
                    # current log contains all info
                    if startTime < timeLimit:
                        useLogTgz = False
            # log files
            dispLogNameList = [dispLogName]
            if useLogTgz:
                today = datetime.date.today()
                dispLogNameList.append('{0}-{1}.gz'.format(
                    dispLogName, today.strftime('%Y%m%d')))
            # delete tmp
            commands_get_status_output('rm -f %s.tmp-*' % dispLogName)
            # tmp name
            tmpLogName = '%s.tmp-%s' % (dispLogName, datetime.datetime.utcnow(
            ).strftime('%Y-%m-%d-%H-%M-%S'))
            # loop over all files
            pilotCounts = {}
            pilotCountsS = {}
            for tmpDispLogName in dispLogNameList:
                # expand or copy
                if tmpDispLogName.endswith('.gz'):
                    com = 'gunzip -c %s > %s' % (tmpDispLogName, tmpLogName)
                else:
                    com = 'cp %s %s' % (tmpDispLogName, tmpLogName)
                lostat, loout = commands_get_status_output(com)
                if lostat != 0:
                    errMsg = 'failed to expand/copy %s with : %s' % (
                        tmpDispLogName, loout)
                    raise RuntimeError(errMsg)
                # search string
                sStr = '^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*'
                sStr += 'method=(.+),site=(.+),node=(.+),type=(.+)'
                # read
                logFH = open(tmpLogName)
                for line in logFH:
                    # check format
                    match = re.search(sStr, line)
                    if match is not None:
                        # check timerange
                        timeStamp = datetime.datetime(*time.strptime(
                            match.group(1), '%Y-%m-%d %H:%M:%S')[:6])
                        if timeStamp < timeLimit:
                            continue
                        tmpMethod = match.group(2)
                        tmpSite = match.group(3)
                        tmpNode = match.group(4)
                        tmpType = match.group(5)

                        # protection against corrupted entries from pilot,
                        # e.g. pilot reading site json from cvmfs while it was being updated
                        if tmpSite not in aSiteMapper.siteSpecList:
                            continue
                        # sum
                        pilotCounts.setdefault(tmpSite, {})
                        pilotCounts[tmpSite].setdefault(tmpMethod, {})
                        pilotCounts[tmpSite][tmpMethod].setdefault(tmpNode, 0)
                        pilotCounts[tmpSite][tmpMethod][tmpNode] += 1
                        # short
                        if timeStamp > timeLimitS:
                            if tmpSite not in pilotCountsS:
                                pilotCountsS[tmpSite] = dict()
                            if tmpMethod not in pilotCountsS[tmpSite]:
                                pilotCountsS[tmpSite][tmpMethod] = dict()
                            if tmpNode not in pilotCountsS[tmpSite][tmpMethod]:
                                pilotCountsS[tmpSite][tmpMethod][tmpNode] = 0
                            pilotCountsS[tmpSite][tmpMethod][tmpNode] += 1
                # close
                logFH.close()
            # delete tmp
            commands_get_status_output('rm %s' % tmpLogName)
            # update
            hostID = panda_config.pserverhost.split('.')[0]
            tmpLog.debug("pilotCounts session")
            retPC = taskBuffer.updateSiteData(hostID, pilotCounts, interval=3)
            tmpLog.debug(retPC)
            retPC = taskBuffer.updateSiteData(hostID, pilotCountsS, interval=1)
            tmpLog.debug(retPC)
    except Exception:
        errType, errValue = sys.exc_info()[:2]
        tmpLog.error("updateJob/getJob : %s %s" % (errType, errValue))

    # nRunning
    tmpLog.debug("nRunning session")
    try:
        if (currentMinute / panda_config.nrun_interval
            ) % panda_config.nrun_hosts == panda_config.nrun_snum:
            retNR = taskBuffer.insertnRunningInSiteData()
            tmpLog.debug(retNR)
    except Exception:
        errType, errValue = sys.exc_info()[:2]
        tmpLog.error("nRunning : %s %s" % (errType, errValue))

    # session for co-jumbo jobs
    tmpLog.debug("co-jumbo session")
    try:
        ret = taskBuffer.getCoJumboJobsToBeFinished(30, 0, 1000)
        if ret is None:
            tmpLog.debug("failed to get co-jumbo jobs to finish")
        else:
            coJumboA, coJumboD, coJumboW, coJumboTokill = ret
            tmpLog.debug("finish {0} co-jumbo jobs in Active".format(
                len(coJumboA)))
            if len(coJumboA) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboA,
                                               fromDefined=False,
                                               fromActive=True,
                                               fromArchived=False,
                                               fromWaiting=False)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], False)
            tmpLog.debug("finish {0} co-jumbo jobs in Defined".format(
                len(coJumboD)))
            if len(coJumboD) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboD,
                                               fromDefined=True,
                                               fromActive=False,
                                               fromArchived=False,
                                               fromWaiting=False)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], True)
            tmpLog.debug("finish {0} co-jumbo jobs in Waiting".format(
                len(coJumboW)))
            if len(coJumboW) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboW,
                                               fromDefined=False,
                                               fromActive=False,
                                               fromArchived=False,
                                               fromWaiting=True)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], False, True)
            tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format(
                len(coJumboTokill)))
            if len(coJumboTokill) > 0:
                jediJobs = list(coJumboTokill)
                nJob = 100
                iJob = 0
                while iJob < len(jediJobs):
                    tmpLog.debug(' killing %s' %
                                 str(jediJobs[iJob:iJob + nJob]))
                    Client.killJobs(jediJobs[iJob:iJob + nJob],
                                    51,
                                    keepUnmerged=True)
                    iJob += nJob
    except Exception:
        errStr = traceback.format_exc()
        tmpLog.error(errStr)

    tmpLog.debug("Fork session")

    # thread for fork
    class ForkThr(threading.Thread):
        def __init__(self, fileName):
            threading.Thread.__init__(self)
            self.fileName = fileName

        def run(self):
            if 'VIRTUAL_ENV' in os.environ:
                prefix = os.environ['VIRTUAL_ENV']
            else:
                prefix = ''
            setupStr = 'source {0}/etc/sysconfig/panda_server; '.format(prefix)
            runStr = '%s/python -Wignore ' % panda_config.native_python
            runStr += panda_config.pandaPython_dir + '/dataservice/forkSetupper.py -i '
            runStr += self.fileName
            if self.fileName.split('/')[-1].startswith('set.NULL.'):
                runStr += ' -t'
            comStr = setupStr + runStr
            tmpLog.debug(comStr)
            commands_get_status_output(comStr)

    # get set.* files
    filePatt = panda_config.logdir + '/' + 'set.*'
    fileList = glob.glob(filePatt)

    # the max number of threads
    maxThr = 10
    nThr = 0

    # loop over all files
    forkThrList = []
    timeNow = datetime.datetime.utcnow()
    for tmpName in fileList:
        if not os.path.exists(tmpName):
            continue
        try:
            # takes care of only recent files
            modTime = datetime.datetime(
                *(time.gmtime(os.path.getmtime(tmpName))[:7]))
            if (timeNow - modTime) > datetime.timedelta(minutes=1) and \
                    (timeNow - modTime) < datetime.timedelta(hours=1):
                cSt, cOut = commands_get_status_output(
                    'ps aux | grep fork | grep -v PYTH')
                # if no process is running for the file
                if cSt == 0 and tmpName not in cOut:
                    nThr += 1
                    thr = ForkThr(tmpName)
                    thr.start()
                    forkThrList.append(thr)
                    if nThr > maxThr:
                        break
        except Exception:
            errType, errValue = sys.exc_info()[:2]
            tmpLog.error("%s %s" % (errType, errValue))

    # join fork threads
    for thr in forkThrList:
        thr.join()

    # terminate TaskBuffer IF
    # taskBufferIF.terminate()

    tmpLog.debug("===================== end =====================")
예제 #15
0
def main(argv=tuple(), tbuf=None, **kwargs):

    try:
        long
    except NameError:
        long = int

    prelock_pid = GenericThread().get_pid()
    tmpLog = LogWrapper(_logger, "<pid={}>".format(prelock_pid))

    tmpLog.debug("===================== start =====================")

    # return value, true to run main again in next daemon loop
    ret_val = True

    # grace period
    try:
        gracePeriod = int(argv[1])
    except Exception:
        gracePeriod = 1

    # lock interval in minutes
    lock_interval = 10

    # retry interval in minutes
    retry_interval = 3

    # instantiate TB
    if tbuf is None:
        from pandaserver.taskbuffer.TaskBuffer import taskBuffer
        taskBuffer.init(panda_config.dbhost,
                        panda_config.dbpasswd,
                        nDBConnection=1)
    else:
        taskBuffer = tbuf

    # instantiate sitemapper
    aSiteMapper = SiteMapper(taskBuffer)

    # thread for adder
    class AdderThread(GenericThread):
        def __init__(self, taskBuffer, aSiteMapper, job_output_reports):
            GenericThread.__init__(self)
            self.taskBuffer = taskBuffer
            self.aSiteMapper = aSiteMapper
            self.job_output_reports = job_output_reports

        # main loop
        def run(self):
            # initialize
            taskBuffer = self.taskBuffer
            aSiteMapper = self.aSiteMapper
            # get file list
            timeNow = datetime.datetime.utcnow()
            timeInt = datetime.datetime.utcnow()
            # unique pid
            GenericThread.__init__(self)
            uniq_pid = self.get_pid()
            # log pid
            tmpLog.debug("pid={0} : run".format(uniq_pid))
            # stats
            n_processed = 0
            # loop
            while True:
                # get report
                one_jor = self.job_output_reports.pop()
                if not one_jor:
                    break
                # lock
                panda_id, job_status, attempt_nr, time_stamp = one_jor
                got_lock = taskBuffer.lockJobOutputReport(
                    panda_id=panda_id,
                    attempt_nr=attempt_nr,
                    pid=uniq_pid,
                    time_limit=lock_interval)
                if not got_lock:
                    continue
                # add
                try:
                    modTime = time_stamp
                    if (timeNow - modTime) > datetime.timedelta(hours=24):
                        # last add
                        tmpLog.debug(
                            "pid={0} : last add job={1}.{2} st={3}".format(
                                uniq_pid, panda_id, attempt_nr, job_status))
                        ignoreTmpError = False
                    else:
                        # usual add
                        tmpLog.debug("pid={0} : add job={1}.{2} st={3}".format(
                            uniq_pid, panda_id, attempt_nr, job_status))
                        ignoreTmpError = True
                    # get adder
                    adder_gen = AdderGen(taskBuffer,
                                         panda_id,
                                         job_status,
                                         attempt_nr,
                                         ignoreTmpError=ignoreTmpError,
                                         siteMapper=aSiteMapper,
                                         pid=uniq_pid,
                                         prelock_pid=uniq_pid,
                                         lock_offset=lock_interval -
                                         retry_interval)
                    n_processed += 1
                    # execute
                    adder_gen.run()
                    del adder_gen
                except Exception as e:
                    tmpLog.error("pid={} : failed to run with {} {}".format(
                        uniq_pid, str(e), traceback.format_exc()))
            # stats
            tmpLog.debug("pid={} : processed {}".format(uniq_pid, n_processed))

        # launcher, run with multiprocessing
        def proc_launch(self):
            # run
            self.process = multiprocessing.Process(target=self.run)
            self.process.start()

        # join of multiprocessing
        def proc_join(self):
            self.process.join()

    # TaskBuffer with more connections behind TaskBufferInterface
    tmpLog.debug("setup taskBufferIF")
    n_connections = 4
    _tbuf = TaskBuffer()
    _tbuf.init(panda_config.dbhost,
               panda_config.dbpasswd,
               nDBConnection=n_connections)
    taskBufferIF = TaskBufferInterface()
    taskBufferIF.launch(_tbuf)

    # add files
    tmpLog.debug("run Adder")

    interval = 10
    nLoop = 10
    for iLoop in range(10):
        tmpLog.debug('start iLoop={}/{}'.format(iLoop, nLoop))
        start_time = datetime.datetime.utcnow()
        adderThrList = []
        nThr = 10

        n_jors_per_batch = 1000

        jor_lists = WeightedLists(multiprocessing.Lock())

        # get some job output reports
        jor_list_others = taskBuffer.listJobOutputReport(
            only_unlocked=True,
            time_limit=lock_interval,
            limit=n_jors_per_batch * nThr,
            grace_period=gracePeriod,
            anti_labels=['user'])
        jor_lists.add(3, jor_list_others)
        jor_list_user = taskBuffer.listJobOutputReport(
            only_unlocked=True,
            time_limit=lock_interval,
            limit=n_jors_per_batch * nThr,
            grace_period=gracePeriod,
            labels=['user'])
        jor_lists.add(7, jor_list_user)

        # adder consumer processes
        _n_thr_with_tbuf = 0
        tbuf_list = []
        tmpLog.debug("got {} job reports".format(len(jor_lists)))
        for i in range(nThr):
            if i < _n_thr_with_tbuf:
                tbuf = TaskBuffer()
                tbuf_list.append(tbuf)
                tbuf.init(panda_config.dbhost,
                          panda_config.dbpasswd,
                          nDBConnection=1)
                thr = AdderThread(tbuf, aSiteMapper, jor_lists)
            else:
                thr = AdderThread(taskBufferIF.getInterface(), aSiteMapper,
                                  jor_lists)
            adderThrList.append(thr)
        # start all threads
        for thr in adderThrList:
            # thr.start()
            thr.proc_launch()
            time.sleep(0.25)

        # join all threads
        for thr in adderThrList:
            # thr.join()
            thr.proc_join()
        [tbuf.cleanup() for tbuf in tbuf_list]
        end_time = datetime.datetime.utcnow()
        sleep_time = interval - (end_time - start_time).seconds
        if sleep_time > 0 and iLoop + 1 < nLoop:
            sleep_time = random.randint(1, sleep_time)
            tmpLog.debug("sleep {} sec".format(sleep_time))
            time.sleep(sleep_time)

    # stop TaskBuffer IF
    taskBufferIF.stop()

    tmpLog.debug("===================== end =====================")

    # return
    return ret_val
예제 #16
0
def core_exec(sandbox_url, log_token, dump_workflow, ops_file, user_name, test_mode):
    tmpLog = LogWrapper(_logger, log_token)
    is_OK = True
    is_fatal = False
    request_id = None
    if dump_workflow == 'True':
        dump_workflow = True
    else:
        dump_workflow = False
    if test_mode == 'True':
        test_mode = True
    else:
        test_mode = False
    try:
        with open(ops_file) as f:
            ops = json.load(f)
        try:
            os.remove(ops_file)
        except Exception:
            pass
        # go to temp dir
        cur_dir = os.getcwd()
        with tempfile.TemporaryDirectory() as tmp_dirname:
            os.chdir(tmp_dirname)
            # download sandbox
            tmpLog.info('downloading sandbox from {}'.format(sandbox_url))
            with requests.get(sandbox_url, allow_redirects=True, verify=False, stream=True) as r:
                if r.status_code == 400:
                    tmpLog.error("not found")
                    is_fatal = True
                    is_OK = False
                elif r.status_code != 200:
                    tmpLog.error("bad HTTP response {}".format(r.status_code))
                    is_OK = False
                # extract sandbox
                if is_OK:
                    with open(ops['data']['sandbox'], 'wb') as fs:
                        for chunk in r.raw.stream(1024, decode_content=False):
                            if chunk:
                                fs.write(chunk)
                        fs.close()
                        tmp_stat, tmp_out = commands_get_status_output(
                            'tar xvfz {}'.format(ops['data']['sandbox']))
                        if tmp_stat != 0:
                            tmpLog.error(tmp_out)
                            dump_str = 'failed to extract {}'.format(ops['data']['sandbox'])
                            tmpLog.error(dump_str)
                            is_fatal = True
                            is_OK = False
                # parse workflow files
                if is_OK:
                    tmpLog.info('parse workflow')
                    if ops['data']['language'] == 'cwl':
                        nodes, root_in = pcwl_utils.parse_workflow_file(ops['data']['workflowSpecFile'],
                                                                        tmpLog)
                        with open(ops['data']['workflowInputFile']) as workflow_input:
                            data = yaml.safe_load(workflow_input)
                        s_id, t_nodes, nodes = pcwl_utils.resolve_nodes(nodes, root_in, data, 0, set(),
                                                                        ops['data']['outDS'], tmpLog)
                        workflow_utils.set_workflow_outputs(nodes)
                        id_node_map = workflow_utils.get_node_id_map(nodes)
                        [node.resolve_params(ops['data']['taskParams'], id_node_map) for node in nodes]
                        dump_str = "the description was internally converted as follows\n" \
                                   + workflow_utils.dump_nodes(nodes)
                        tmpLog.info(dump_str)
                        for node in nodes:
                            s_check, o_check = node.verify()
                            tmp_str = 'Verification failure in ID:{} {}'.format(node.id, o_check)
                            if not s_check:
                                tmpLog.error(tmp_str)
                                dump_str += tmp_str
                                dump_str += '\n'
                                is_fatal = True
                                is_OK = False
                    else:
                        dump_str = "{} is not supported to describe the workflow"
                        tmpLog.error(dump_str)
                        is_fatal = True
                        is_OK = False
                    # convert to workflow
                    if is_OK:
                        workflow_to_submit, dump_str_list = workflow_utils.convert_nodes_to_workflow(nodes)
                        try:
                            if workflow_to_submit:
                                if not test_mode:
                                    tmpLog.info('submit workflow')
                                    wm = ClientManager(host=get_rest_host())
                                    request_id = wm.submit(workflow_to_submit, username=user_name)
                            else:
                                dump_str = 'workflow is empty'
                                tmpLog.error(dump_str)
                                is_fatal = True
                                is_OK = False
                        except Exception as e:
                            dump_str = 'failed to submit the workflow with {}'.format(str(e))
                            tmpLog.error('{} {}'.format(dump_str, traceback.format_exc()))
                        if dump_workflow:
                            tmpLog.debug('\n' + ''.join(dump_str_list))
        os.chdir(cur_dir)
    except Exception as e:
        is_OK = False
        is_fatal = True
        tmpLog.error("failed to run with {} {}".format(str(e), traceback.format_exc()))

    with tempfile.NamedTemporaryFile(delete=False, mode='w') as tmp_json:
        json.dump([is_OK, is_fatal, request_id, tmpLog.dumpToString()], tmp_json)
        print(tmp_json.name)
    sys.exit(0)
예제 #17
0
 def process(self, file_name, to_delete=False, test_mode=False, get_log=False, dump_workflow=False):
     try:
         is_fatal = False
         is_OK = True
         request_id = None
         dump_str = None
         with open(file_name) as f:
             ops = json.load(f)
             user_name = clean_user_id(ops["userName"])
             base_platform = ops['data'].get('base_platform')
             for task_type in ops['data']['taskParams']:
                 ops['data']['taskParams'][task_type]['userName'] = user_name
                 if base_platform:
                     ops['data']['taskParams'][task_type]['basePlatform'] = base_platform
             log_token = '< id="{}" test={} outDS={} >'.format(user_name, test_mode, ops['data']['outDS'])
             tmpLog = LogWrapper(self.log, log_token)
             tmpLog.info('start {}'.format(file_name))
             sandbox_url = os.path.join(ops['data']['sourceURL'], 'cache', ops['data']['sandbox'])
             # IO through json files
             ops_file = tempfile.NamedTemporaryFile(delete=False, mode='w')
             json.dump(ops, ops_file)
             ops_file.close()
             # execute main in another process to avoid chdir mess
             tmp_stat, tmp_out = commands_get_status_output("python {} {} '{}' {} {} '{}' {}".format(
                 __file__, sandbox_url, log_token, dump_workflow, ops_file.name,
                 user_name, test_mode))
             if tmp_stat:
                 is_OK = False
                 tmpLog.error('main execution failed with {}:{}'.format(tmp_stat, tmp_out))
             else:
                 with open(tmp_out.split('\n')[-1]) as tmp_out_file:
                     is_OK, is_fatal, request_id, dump_str = json.load(tmp_out_file)
                 try:
                     os.remove(tmp_out)
                 except Exception:
                     pass
             if not get_log:
                 if is_OK:
                     tmpLog.info('is_OK={} request_id={}'.format(is_OK, request_id))
                 else:
                     tmpLog.info('is_OK={} is_fatal={} request_id={}'.format(is_OK, is_fatal, request_id))
             if to_delete or (not test_mode and (is_OK or is_fatal)):
                 dump_str = tmpLog.dumpToString() + dump_str
                 tmpLog.debug('delete {}'.format(file_name))
                 try:
                     os.remove(file_name)
                 except Exception:
                     pass
                 # send notification
                 if not test_mode and self.taskBuffer is not None:
                     toAdder = self.taskBuffer.getEmailAddr(user_name)
                     if toAdder is None or toAdder.startswith('notsend'):
                         tmpLog.debug('skip to send notification since suppressed')
                     else:
                         # message
                         if is_OK:
                             mailSubject = "PANDA Notification for Workflow {}".format(ops['data']['outDS'])
                             mailBody = "Hello,\n\nWorkflow:{} has been accepted with RequestID:{}\n\n".\
                                 format(ops['data']['outDS'], request_id)
                         else:
                             mailSubject = "PANDA WARNING for Workflow={}".format(ops['data']['outDS'])
                             mailBody = "Hello,\n\nWorkflow {} was not accepted\n\n".\
                                 format(ops['data']['outDS'], request_id)
                             mailBody += "Reason : %s\n" % dump_str
                         # send
                         tmpSM = MailUtils().send(toAdder, mailSubject, mailBody)
                         tmpLog.debug('sent message with {}'.format(tmpSM))
     except Exception as e:
         is_OK = False
         tmpLog.error("failed to run with {} {}".format(str(e), traceback.format_exc()))
     if get_log:
         ret_val = {'status': is_OK}
         if is_OK:
             ret_val['log'] = dump_str
         else:
             if dump_str is None:
                 ret_val['log'] = tmpLog.dumpToString()
             else:
                 ret_val['log'] = dump_str
         return ret_val
예제 #18
0
 def application(environ, start_response):
     # get method name
     methodName = ''
     if 'SCRIPT_NAME' in environ:
         methodName = environ['SCRIPT_NAME'].split('/')[-1]
     tmpLog = LogWrapper(_logger, "PID={0} {1}".format(os.getpid(), methodName))
     tmpLog.debug("start")
     regStart = datetime.datetime.utcnow()
     retType = None
     # check method name    
     if not methodName in allowedMethods:
         tmpLog.error("is forbidden")
         exeRes = "False : %s is forbidden" % methodName
     else:
         # get method object
         tmpMethod = None
         try:
             tmpMethod = globals()[methodName]
         except Exception:
             pass
         # object not found
         if tmpMethod is None:
             tmpLog.error("is undefined")
             exeRes = "False"
         else:
             try:
                 # get params 
                 tmpPars = cgi.FieldStorage(environ['wsgi.input'], environ=environ,
                                            keep_blank_values=1)
                 # convert to map
                 params = {}
                 for tmpKey in list(tmpPars):
                     if tmpPars[tmpKey].file is not None and tmpPars[tmpKey].filename is not None:
                         # file
                         params[tmpKey] = tmpPars[tmpKey]
                     else:
                         # string
                         params[tmpKey] = tmpPars.getfirst(tmpKey)
                 if panda_config.entryVerbose:
                     tmpLog.debug("with %s" % str(list(params)))
                 # dummy request object
                 dummyReq = DummyReq(environ, tmpLog)
                 param_list = [dummyReq]
                 # exec
                 exeRes = tmpMethod(*param_list, **params)
                 # extract return type
                 if isinstance(exeRes, dict):
                     retType = exeRes['type']
                     exeRes  = exeRes['content']
                 # convert bool to string
                 if exeRes in [True,False]:
                     exeRes = str(exeRes)
             except Exception as e:
                 tmpLog.error("execution failure : {0}".format(str(e)))
                 errStr = ""
                 for tmpKey in environ:
                     tmpVal = environ[tmpKey]
                     errStr += "%s : %s\n" % (tmpKey,str(tmpVal))
                 tmpLog.error(errStr)
                 # return internal server error
                 start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')]) 
                 return [str(e)]
     if panda_config.entryVerbose:
         tmpLog.debug("done")
     regTime = datetime.datetime.utcnow() - regStart
     tmpLog.info("exec_time=%s.%03d sec, return len=%s B" % (regTime.seconds,
                                                             regTime.microseconds/1000,
                                                             len(str(exeRes))))
     # return
     if exeRes == pandaserver.taskbuffer.ErrorCode.EC_NotFound:
         start_response('404 Not Found', [('Content-Type', 'text/plain')])
         return ['not found']
     elif isinstance(exeRes, pandaserver.taskbuffer.ErrorCode.EC_Redirect):
         start_response('302 Redirect', [('Location', exeRes.url)])
         return ['redirect']
     else:                
         if retType == 'json':
             start_response('200 OK', [('Content-Type', 'application/json')])
         else:
             start_response('200 OK', [('Content-Type', 'text/plain')])
         if isinstance(exeRes, str):
             exeRes = exeRes.encode()
         return [exeRes]
예제 #19
0
 def __init__(self, job, datasets, log):
     self.jobSpec = job
     self.datasets = datasets
     self.tmpLog = LogWrapper(
         log, "{0} CloserAtlasPlugin".format(self.jobSpec.PandaID))
예제 #20
0
 def run(self):
     try:
         # make a message instance
         tmpLog = LogWrapper(_logger, None)
         # run main procedure in the same process
         if not self.forkRun:
             tmpLog.debug('main start')
             tmpLog.debug('firstSubmission={0}'.format(
                 self.firstSubmission))
             # group jobs per VO
             voJobsMap = {}
             ddmFreeJobs = []
             tmpLog.debug('{0} jobs in total'.format(len(self.jobs)))
             for tmpJob in self.jobs:
                 # set VO=local for DDM free
                 if tmpJob.destinationSE == 'local':
                     tmpVO = 'local'
                 else:
                     tmpVO = tmpJob.VO
                 # make map
                 voJobsMap.setdefault(tmpVO, [])
                 voJobsMap[tmpVO].append(tmpJob)
             # loop over all VOs
             for tmpVO in voJobsMap:
                 tmpJobList = voJobsMap[tmpVO]
                 tmpLog.debug('vo={0} has {1} jobs'.format(
                     tmpVO, len(tmpJobList)))
                 # get plugin
                 setupperPluginClass = panda_config.getPlugin(
                     'setupper_plugins', tmpVO)
                 if setupperPluginClass is None:
                     # use ATLAS plug-in by default
                     from pandaserver.dataservice.SetupperAtlasPlugin import SetupperAtlasPlugin
                     setupperPluginClass = SetupperAtlasPlugin
                 tmpLog.debug('plugin name -> {0}'.format(
                     setupperPluginClass.__name__))
                 try:
                     # make plugin
                     setupperPlugin = setupperPluginClass(
                         self.taskBuffer,
                         self.jobs,
                         tmpLog,
                         resubmit=self.resubmit,
                         pandaDDM=self.pandaDDM,
                         ddmAttempt=self.ddmAttempt,
                         onlyTA=self.onlyTA,
                         firstSubmission=self.firstSubmission)
                     # run plugin
                     tmpLog.debug('run plugin')
                     setupperPlugin.run()
                     # go forward if not TA
                     if not self.onlyTA:
                         # update jobs
                         tmpLog.debug('update jobs')
                         self.updateJobs(
                             setupperPlugin.jobs + setupperPlugin.jumboJobs,
                             tmpLog)
                         # execute post process
                         tmpLog.debug('post execute plugin')
                         setupperPlugin.postRun()
                     tmpLog.debug('done plugin')
                 except Exception:
                     errtype, errvalue = sys.exc_info()[:2]
                     tmpLog.error('plugin failed with {0}:{1}'.format(
                         errtype, errvalue))
             tmpLog.debug('main end')
         else:
             tmpLog.debug('fork start')
             # write jobs to file
             import os
             try:
                 import cPickle as pickle
             except ImportError:
                 import pickle
             outFileName = '%s/set.%s_%s' % (panda_config.logdir,
                                             self.jobs[0].PandaID,
                                             str(uuid.uuid4()))
             outFile = open(outFileName, 'wb')
             pickle.dump(self.jobs, outFile, protocol=0)
             outFile.close()
             # run main procedure in another process because python doesn't release memory
             com = 'cd %s > /dev/null 2>&1; export HOME=%s; ' % (
                 panda_config.home_dir_cwd, panda_config.home_dir_cwd)
             com += 'env PYTHONPATH=%s:%s %s/python -Wignore %s/dataservice/forkSetupper.py -i %s' % \
                    (panda_config.pandaCommon_dir,panda_config.pandaPython_dir,panda_config.native_python,
                     panda_config.pandaPython_dir,outFileName)
             if self.onlyTA:
                 com += " -t"
             if not self.firstSubmission:
                 com += " -f"
             tmpLog.debug(com)
             # execute
             status, output = self.taskBuffer.processLimiter.getstatusoutput(
                 com)
             tmpLog.debug("return from main process: %s %s" %
                          (status, output))
             tmpLog.debug('fork end')
     except Exception as e:
         tmpLog.error('master failed with {0} {1}'.format(
             str(e), traceback.format_exc()))
예제 #21
0
import re
import sys
import datetime
import traceback
from pandaserver.taskbuffer.TaskBuffer import taskBuffer
from pandacommon.pandalogger.PandaLogger import PandaLogger
from pandacommon.pandalogger.LogWrapper import LogWrapper
from pandaserver.brokerage.SiteMapper import SiteMapper

# password
from pandaserver.config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('prioryMassage')
tmpLog = LogWrapper(_logger)

tmpLog.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

# get usage breakdown
usageBreakDownPerUser = {}
usageBreakDownPerSite = {}
workingGroupList = []
for table in ['ATLAS_PANDA.jobsActive4', 'ATLAS_PANDA.jobsArchived4']:
    varMap = {}
예제 #22
0
 def getGUIDsFromEventIndex(self, runEventList, streamName, amiTags,
                            dataType):
     comment = ' /* DBProxy.getGUIDsFromEventIndex */'
     methodName = comment.split(' ')[-2].split('.')[-1]
     tmpLog = LogWrapper(
         _logger,
         methodName + " <streamName={0} amiTags={1} dataType={2}>".format(
             streamName, amiTags, dataType))
     try:
         # change to list
         if not amiTags in [None, '']:
             amiTags = amiTags.replace('*', '.*').split(',')
         tmpLog.debug("start for {0} events".format(len(runEventList)))
         # check data type
         if not dataType in ['RAW', 'ESD', 'AOD']:
             return False, 'dataType={0} is unsupported'.format(dataType)
         # sql to insert runs and events
         sqlRE = "INSERT INTO {0}.TMP_RUN_EVENT_PAIRS (runNumber,eventNumber) ".format(
             panda_config.schemaEI)
         sqlRE += "VALUES (:runNumber,:eventNumber) "
         varMaps = []
         for runNumber, eventNumber in runEventList:
             varMap = {}
             varMap[':runNumber'] = runNumber
             varMap[':eventNumber'] = eventNumber
             varMaps.append(varMap)
         # begin transaction
         self.conn.begin()
         self.cur.arraysize = 100000
         # insert runs and events
         self.cur.executemany(sqlRE + comment, varMaps)
         # read GUIDs
         varMap = {}
         if amiTags in [None, '']:
             sqlRG = "SELECT runNumber,eventNumber,guid_{0} ".format(
                 dataType)
             sqlRG += "FROM {0}.V_PANDA_EVPICK_NOAMITAG_MANY ".format(
                 panda_config.schemaEI)
         else:
             sqlRG = "SELECT runNumber,eventNumber,guid_{0},amiTag ".format(
                 dataType)
             sqlRG += "FROM {0}.V_PANDA_EVPICK_AMITAG_MANY ".format(
                 panda_config.schemaEI)
         if not streamName in [None, '']:
             sqlRG += "WHERE streamName=:streamName "
             varMap[':streamName'] = streamName
         self.cur.execute(sqlRG + comment, varMap)
         resRG = self.cur.fetchall()
         # commit
         if not self._commit():
             raise RuntimeError('Commit error')
         retValue = {}
         keyAmiIdxMap = {}
         for tmpItem in resRG:
             if amiTags in [None, '']:
                 runNumber, eventNumber, guid = tmpItem
                 # dummy
                 idxTag = 0
             else:
                 runNumber, eventNumber, guid, amiTag = tmpItem
                 # get index number for the AMI tag in the list
                 idxTag = self.getIndexAmiTag(amiTags, amiTag)
                 # didn't match
                 if idxTag is None:
                     continue
             tmpKey = (runNumber, eventNumber)
             # use AMI tag in a preference orde
             if tmpKey in keyAmiIdxMap and keyAmiIdxMap[tmpKey] < idxTag:
                 continue
             keyAmiIdxMap[tmpKey] = idxTag
             retValue[tmpKey] = [guid]
         tmpLog.debug("found {0} events".format(len(retValue)))
         return True, retValue
     except Exception:
         # roll back
         self._rollback()
         # error
         self.dumpErrorMessage(_logger, methodName)
         return False, None
예제 #23
0
from pandaserver.srvcore.CoreUtils import commands_get_status_output

from pandaserver.taskbuffer.TaskBufferInterface import TaskBufferInterface

try:
    long
except NameError:
    long = int

# password
from pandaserver.config import panda_config

# logger
_logger = PandaLogger().getLogger('add')

tmpLog = LogWrapper(_logger, None)

tmpLog.debug("===================== start =====================")

# overall timeout value
overallTimeout = 20

# grace period
try:
    gracePeriod = int(sys.argv[1])
except Exception:
    gracePeriod = 3

# current minute
currentMinute = datetime.datetime.utcnow().minute
예제 #24
0
 def application(environ, start_response):
     # get method name
     methodName = ''
     if 'SCRIPT_NAME' in environ:
         methodName = environ['SCRIPT_NAME'].split('/')[-1]
     tmpLog = LogWrapper(_logger, "PID={0} {1}".format(os.getpid(), methodName), seeMem=True)
     cont_length = int(environ.get('CONTENT_LENGTH', 0))
     json_body = environ.get('CONTENT_TYPE', None) == 'application/json'
     tmpLog.debug("start content-length={} json={}".format(cont_length, json_body))
     regStart = datetime.datetime.utcnow()
     retType = None
     # check method name
     if methodName not in allowedMethods:
         tmpLog.error("is forbidden")
         exeRes = "False : %s is forbidden" % methodName
     else:
         # get method object
         tmpMethod = None
         try:
             tmpMethod = globals()[methodName]
         except Exception:
             pass
         # object not found
         if tmpMethod is None:
             tmpLog.error("is undefined")
             exeRes = "False"
         else:
             body = b''
             try:
                 # dummy request object
                 dummyReq = DummyReq(environ, tmpLog)
                 if not dummyReq.authenticated:
                     start_response('403 Forbidden', [('Content-Type', 'text/plain')])
                     return ["ERROR : Token authentication failed on the server side. {}".format(
                         dummyReq.message).encode()]
                 username = dummyReq.subprocess_env.get('SSL_CLIENT_S_DN', None)
                 if username:
                     username = CoreUtils.clean_user_id(username)
                     if username in ban_user_list:
                         errMsg = '{} is banned'.format(username)
                         tmpLog.warning(errMsg)
                         start_response('403 Forbidden', [('Content-Type', 'text/plain')])
                         return ["ERROR : {}".format(errMsg).encode()]
                 # read contents
                 while cont_length > 0:
                     chunk = environ['wsgi.input'].read(min(cont_length, 1024*1024))
                     if not chunk:
                         break
                     cont_length -= len(chunk)
                     body += chunk
                 if cont_length > 0:
                     raise OSError('partial read from client. {} bytes remaining'.format(cont_length))
                 if not json_body:
                     # query string
                     environ['wsgi.input'] = io.BytesIO(body)
                     # get params
                     tmpPars = cgi.FieldStorage(environ['wsgi.input'], environ=environ,
                                                keep_blank_values=1)
                     # convert to map
                     params = {}
                     for tmpKey in list(tmpPars):
                         if tmpPars[tmpKey].file is not None and tmpPars[tmpKey].filename is not None:
                             # file
                             params[tmpKey] = tmpPars[tmpKey]
                         else:
                             # string
                             params[tmpKey] = tmpPars.getfirst(tmpKey)
                 else:
                     # json
                     body = gzip.decompress(body)
                     params = json.loads(body)
                 if panda_config.entryVerbose:
                     tmpLog.debug("with %s" % str(list(params)))
                 param_list = [dummyReq]
                 # exec
                 exeRes = tmpMethod(*param_list, **params)
                 # extract return type
                 if isinstance(exeRes, dict):
                     retType = exeRes['type']
                     exeRes  = exeRes['content']
                 # convert bool to string
                 if exeRes in [True,False]:
                     exeRes = str(exeRes)
             except Exception as e:
                 tmpLog.error("execution failure : {0}\n {1}".format(str(e), traceback.format_exc()))
                 if hasattr(panda_config, 'dumpBadRequest') and panda_config.dumpBadRequest:
                     try:
                         with tempfile.NamedTemporaryFile(delete=False, prefix='req_dump_') as f:
                             environ['WSGI_INPUT_DUMP'] = f.name
                             f.write(body)
                             os.chmod(f.name, 0o775)
                     except Exception:
                         tmpLog.error(traceback.format_exc())
                         pass
                 errStr = ""
                 for tmpKey in environ:
                     tmpVal = environ[tmpKey]
                     errStr += "%s : %s\n" % (tmpKey,str(tmpVal))
                 tmpLog.error(errStr)
                 # return internal server error
                 start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')])
                 # force kill to release memory
                 if type(e) == OSError:
                     tmpLog.warning('force restart due')
                     os.kill(os.getpid(), signal.SIGINT)
                 return [str(e).encode()]
     if panda_config.entryVerbose:
         tmpLog.debug("done")
     regTime = datetime.datetime.utcnow() - regStart
     tmpLog.info("exec_time=%s.%03d sec, return len=%s B" % (regTime.seconds,
                                                             regTime.microseconds/1000,
                                                             len(str(exeRes))))
     # return
     if exeRes == pandaserver.taskbuffer.ErrorCode.EC_NotFound:
         start_response('404 Not Found', [('Content-Type', 'text/plain')])
         return ['not found'.encode()]
     elif isinstance(exeRes, pandaserver.taskbuffer.ErrorCode.EC_Redirect):
         start_response('302 Redirect', [('Location', exeRes.url)])
         return ['redirect'.encode()]
     else:
         if retType == 'json':
             start_response('200 OK', [('Content-Type', 'application/json')])
         else:
             start_response('200 OK', [('Content-Type', 'text/plain')])
         if isinstance(exeRes, str):
             exeRes = exeRes.encode()
         return [exeRes]