def uploadLog(req,file): if not Protocol.isSecure(req): return False if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']: return False tmpLog = LogWrapper(_logger,'uploadLog <{0}>'.format(file.filename)) tmpLog.debug("start {0}".format(req.subprocess_env['SSL_CLIENT_S_DN'])) # size check sizeLimit = 100*1024*1024 # get file size contentLength = 0 try: contentLength = long(req.headers_in["content-length"]) except Exception: if "content-length" in req.headers_in: tmpLog.error("cannot get CL : %s" % req.headers_in["content-length"]) else: tmpLog.error("no CL") tmpLog.debug("size %s" % contentLength) if contentLength > sizeLimit: errStr = "failed to upload log due to size limit" tmpLog.error(errStr) tmpLog.debug("end") return errStr jediLogDir = '/jedilog' retStr = '' try: fileBaseName = file.filename.split('/')[-1] fileFullPath = '{0}{1}/{2}'.format(panda_config.cache_dir,jediLogDir,fileBaseName) # delete old file if os.path.exists(fileFullPath): os.remove(fileFullPath) # write fo = open(fileFullPath,'wb') fileContent = file.file.read() fo.write(fileContent) fo.close() tmpLog.debug("written to {0}".format(fileFullPath)) retStr = 'http://{0}/cache{1}/{2}'.format(getServerHTTP(None),jediLogDir,fileBaseName) except Exception: errtype,errvalue = sys.exc_info()[:2] errStr = "failed to write log with {0}:{1}".format(errtype.__name__,errvalue) tmpLog.error(errStr) tmpLog.debug("end") return errStr tmpLog.debug("end") return retStr
if re.search('python', line) is None: continue # PID pid = items[1] # start time timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)', line) startTime = datetime.datetime( *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6]) # kill old process if startTime < timeLimit: tmpLog.debug("old process : %s %s" % (pid, startTime)) tmpLog.debug(line) commands_get_status_output('kill -9 %s' % pid) except Exception: type, value, traceBack = sys.exc_info() tmpLog.error("kill process : %s %s" % (type, value)) # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper aSiteMapper = SiteMapper(taskBuffer) # delete tmpLog.debug("Del session") status, retSel = taskBuffer.querySQLS( "SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4", {}) if retSel is not None: try: maxID = retSel[0][0] tmpLog.debug("maxID : %s" % maxID)
'warning') elif tmpNumTotal < maxNumRun * 0.9 and ( prodUserName, workingGroup) in throttledUsers: # throttle user tmpNumJobs = taskBuffer.unThrottleUserJobs( prodUserName, workingGroup) if tmpNumJobs is not None and tmpNumJobs > 0: msg = 'released jobs for user="******" group={1} since number of running jobs is less than {2}'.format( prodUserName, workingGroup, maxNumRun) tmpLog.debug(msg) tmpLog.sendMsg(msg, panda_config.loggername, 'userCap') except Exception as e: errStr = "cap failed for %s : %s" % (prodUserName, str(e)) errStr.strip() errStr += traceback.format_exc() tmpLog.error(errStr) # global average tmpLog.debug("=== boost jobs") globalAverageRunDone = float(totalRunDone) / float(totalUsers) tmpLog.debug("global average : %s" % globalAverageRunDone) # count the number of users and run/done jobs for each site siteRunDone = {} siteUsers = {} for computingSite in usageBreakDownPerSite: userValMap = usageBreakDownPerSite[computingSite] for prodUserName in userValMap: wgValMap = userValMap[prodUserName] for workingGroup in wgValMap:
class AdderGen(object): # constructor def __init__(self, taskBuffer, jobID, jobStatus, attemptNr, ignoreTmpError=True, siteMapper=None, pid=None, prelock_pid=None, lock_offset=10): self.job = None self.jobID = jobID self.jobStatus = jobStatus self.taskBuffer = taskBuffer self.ignoreTmpError = ignoreTmpError self.lock_offset = lock_offset self.siteMapper = siteMapper self.datasetMap = {} self.extraInfo = { 'surl': {}, 'nevents': {}, 'lbnr': {}, 'endpoint': {}, 'guid': {} } self.attemptNr = attemptNr self.pid = pid self.prelock_pid = prelock_pid self.data = None # logger self.logger = LogWrapper(_logger, str(self.jobID)) # dump file report def dumpFileReport(self, fileCatalog, attemptNr): self.logger.debug("dump file report") # dump Catalog into file # if attemptNr is None: # xmlFile = '%s/%s_%s_%s' % (panda_config.logdir,self.jobID,self.jobStatus, # str(uuid.uuid4())) # else: # xmlFile = '%s/%s_%s_%s_%s' % (panda_config.logdir,self.jobID,self.jobStatus, # str(uuid.uuid4()),attemptNr) # file = open(xmlFile,'w') # file.write(fileCatalog) # file.close() # dump Catalog into job output report table attempt_nr = 0 if attemptNr is None else attemptNr if self.job is None: self.job = self.taskBuffer.peekJobs([self.jobID], fromDefined=False, fromWaiting=False, forAnal=True)[0] if self.job: self.taskBuffer.insertJobOutputReport( panda_id=self.jobID, prod_source_label=self.job.prodSourceLabel, job_status=self.jobStatus, attempt_nr=attempt_nr, data=fileCatalog) # get plugin class def getPluginClass(self, tmpVO, tmpGroup): # instantiate concrete plugin adderPluginClass = panda_config.getPlugin('adder_plugins', tmpVO, tmpGroup) if adderPluginClass is None: # use ATLAS plugin by default from pandaserver.dataservice.AdderAtlasPlugin import AdderAtlasPlugin adderPluginClass = AdderAtlasPlugin self.logger.debug('plugin name {0}'.format(adderPluginClass.__name__)) return adderPluginClass # main def run(self): try: self.logger.debug("new start: %s attemptNr=%s" % (self.jobStatus, self.attemptNr)) # got lock, get the report report_dict = self.taskBuffer.getJobOutputReport( panda_id=self.jobID, attempt_nr=self.attemptNr) self.data = report_dict.get('data') # query job self.job = self.taskBuffer.peekJobs([self.jobID], fromDefined=False, fromWaiting=False, forAnal=True)[0] # check if job has finished if self.job is None: self.logger.debug(': job not found in DB') elif self.job.jobStatus in [ 'finished', 'failed', 'unknown', 'merging' ]: self.logger.error(': invalid state -> %s' % self.job.jobStatus) elif self.attemptNr is not None and self.job.attemptNr != self.attemptNr: self.logger.error('wrong attemptNr -> job=%s <> %s' % (self.job.attemptNr, self.attemptNr)) # elif self.attemptNr is not None and self.job.jobStatus == 'transferring': # errMsg = 'XML with attemptNr for {0}'.format(self.job.jobStatus) # self.logger.error(errMsg) elif self.jobStatus == EventServiceUtils.esRegStatus: # instantiate concrete plugin adderPluginClass = self.getPluginClass(self.job.VO, self.job.cloud) adderPlugin = adderPluginClass(self.job, taskBuffer=self.taskBuffer, siteMapper=self.siteMapper, logger=self.logger) # execute self.logger.debug('plugin is ready for ES file registration') adderPlugin.registerEventServiceFiles() else: # check file status in JEDI if not self.job.isCancelled() and self.job.taskBufferErrorCode not in \ [pandaserver.taskbuffer.ErrorCode.EC_PilotRetried]: fileCheckInJEDI = self.taskBuffer.checkInputFileStatusInJEDI( self.job) self.logger.debug("check file status in JEDI : {0}".format( fileCheckInJEDI)) if fileCheckInJEDI is None: raise RuntimeError( 'failed to check file status in JEDI') if fileCheckInJEDI is False: # set job status to failed since some file status is wrong in JEDI self.jobStatus = 'failed' self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder errStr = "inconsistent file status between Panda and JEDI. " errStr += "failed to avoid duplicated processing caused by synchronization failure" self.job.ddmErrorDiag = errStr self.logger.debug( "set jobStatus={0} since input is inconsistent between Panda and JEDI" .format(self.jobStatus)) elif self.job.jobSubStatus in ['pilot_closed']: # terminated by the pilot self.logger.debug( "going to closed since terminated by the pilot") retClosed = self.taskBuffer.killJobs([self.jobID], 'pilot', '60', True) if retClosed[0] is True: self.logger.debug("end") # remove Catalog self.taskBuffer.deleteJobOutputReport( panda_id=self.jobID, attempt_nr=self.attemptNr) return # check for cloned jobs if EventServiceUtils.isJobCloningJob(self.job): checkJC = self.taskBuffer.checkClonedJob(self.job) if checkJC is None: raise RuntimeError( 'failed to check the cloned job') # failed to lock semaphore if checkJC['lock'] is False: self.jobStatus = 'failed' self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder self.job.ddmErrorDiag = "failed to lock semaphore for job cloning" self.logger.debug( "set jobStatus={0} since did not get semaphore for job cloning" .format(self.jobStatus)) # use failed for cancelled/closed jobs if self.job.isCancelled(): self.jobStatus = 'failed' # reset error codes to skip retrial module self.job.pilotErrorCode = 0 self.job.exeErrorCode = 0 self.job.ddmErrorCode = 0 # keep old status oldJobStatus = self.job.jobStatus # set job status if self.job.jobStatus not in ['transferring']: self.job.jobStatus = self.jobStatus addResult = None adderPlugin = None # parse XML parseResult = self.parseXML() if parseResult < 2: # interaction with DDM try: # instantiate concrete plugin adderPluginClass = self.getPluginClass( self.job.VO, self.job.cloud) adderPlugin = adderPluginClass( self.job, taskBuffer=self.taskBuffer, siteMapper=self.siteMapper, extraInfo=self.extraInfo, logger=self.logger) # execute self.logger.debug('plugin is ready') adderPlugin.execute() addResult = adderPlugin.result self.logger.debug('plugin done with %s' % (addResult.statusCode)) except Exception: errtype, errvalue = sys.exc_info()[:2] self.logger.error( "failed to execute AdderPlugin for VO={0} with {1}:{2}" .format(self.job.VO, errtype, errvalue)) self.logger.error( "failed to execute AdderPlugin for VO={0} with {1}" .format(self.job.VO, traceback.format_exc())) addResult = None self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder self.job.ddmErrorDiag = "AdderPlugin failure" # ignore temporary errors if self.ignoreTmpError and addResult is not None and addResult.isTemporary( ): self.logger.debug(': ignore %s ' % self.job.ddmErrorDiag) self.logger.debug('escape') # unlock job output report self.taskBuffer.unlockJobOutputReport( panda_id=self.jobID, attempt_nr=self.attemptNr, pid=self.pid, lock_offset=self.lock_offset) return # failed if addResult is None or not addResult.isSucceeded(): self.job.jobStatus = 'failed' # set file status for failed jobs or failed transferring jobs self.logger.debug( "status after plugin call :job.jobStatus=%s jobStatus=%s" % (self.job.jobStatus, self.jobStatus)) if self.job.jobStatus == 'failed' or self.jobStatus == 'failed': # First of all: check if job failed and in this case take first actions according to error table source, error_code, error_diag = None, None, None errors = [] if self.job.pilotErrorCode: source = 'pilotErrorCode' error_code = self.job.pilotErrorCode error_diag = self.job.pilotErrorDiag errors.append({ 'source': source, 'error_code': error_code, 'error_diag': error_diag }) if self.job.exeErrorCode: source = 'exeErrorCode' error_code = self.job.exeErrorCode error_diag = self.job.exeErrorDiag errors.append({ 'source': source, 'error_code': error_code, 'error_diag': error_diag }) if self.job.ddmErrorCode: source = 'ddmErrorCode' error_code = self.job.ddmErrorCode error_diag = self.job.ddmErrorDiag errors.append({ 'source': source, 'error_code': error_code, 'error_diag': error_diag }) if self.job.transExitCode: source = 'transExitCode' error_code = self.job.transExitCode error_diag = '' errors.append({ 'source': source, 'error_code': error_code, 'error_diag': error_diag }) # _logger.info("updatejob has source %s, error_code %s and error_diag %s"%(source, error_code, error_diag)) if source and error_code: try: self.logger.debug( "AdderGen.run will call apply_retrial_rules") retryModule.apply_retrial_rules( self.taskBuffer, self.job.PandaID, errors, self.job.attemptNr) self.logger.debug("apply_retrial_rules is back") except Exception as e: self.logger.error( "apply_retrial_rules excepted and needs to be investigated (%s): %s" % (e, traceback.format_exc())) self.job.jobStatus = 'failed' for file in self.job.Files: if file.type in ['output', 'log']: if addResult is not None and file.lfn in addResult.mergingFiles: file.status = 'merging' else: file.status = 'failed' else: # reset errors self.job.jobDispatcherErrorCode = 0 self.job.jobDispatcherErrorDiag = 'NULL' # set status if addResult is not None and addResult.mergingFiles != []: # set status for merging: for file in self.job.Files: if file.lfn in addResult.mergingFiles: file.status = 'merging' self.job.jobStatus = 'merging' # propagate transition to prodDB self.job.stateChangeTime = time.strftime( '%Y-%m-%d %H:%M:%S', time.gmtime()) elif addResult is not None and addResult.transferringFiles != []: # set status for transferring for file in self.job.Files: if file.lfn in addResult.transferringFiles: file.status = 'transferring' self.job.jobStatus = 'transferring' self.job.jobSubStatus = None # propagate transition to prodDB self.job.stateChangeTime = time.strftime( '%Y-%m-%d %H:%M:%S', time.gmtime()) else: self.job.jobStatus = 'finished' # endtime if self.job.endTime == 'NULL': self.job.endTime = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) # output size and # of outputs self.job.nOutputDataFiles = 0 self.job.outputFileBytes = 0 for tmpFile in self.job.Files: if tmpFile.type == 'output': self.job.nOutputDataFiles += 1 try: self.job.outputFileBytes += tmpFile.fsize except Exception: pass # protection maxOutputFileBytes = 99999999999 if self.job.outputFileBytes > maxOutputFileBytes: self.job.outputFileBytes = maxOutputFileBytes # set cancelled state if self.job.commandToPilot == 'tobekilled' and self.job.jobStatus == 'failed': self.job.jobStatus = 'cancelled' # update job if oldJobStatus in ['cancelled', 'closed']: pass else: self.logger.debug("updating DB") retU = self.taskBuffer.updateJobs( [self.job], False, oldJobStatusList=[oldJobStatus], extraInfo=self.extraInfo) self.logger.debug("retU: %s" % retU) # failed if not retU[0]: self.logger.error( 'failed to update DB for pandaid={0}'.format( self.job.PandaID)) # unlock job output report self.taskBuffer.unlockJobOutputReport( panda_id=self.jobID, attempt_nr=self.attemptNr, pid=self.pid, lock_offset=self.lock_offset) return try: # updateJobs was successful and it failed a job with taskBufferErrorCode self.logger.debug("AdderGen.run will peek the job") job_tmp = self.taskBuffer.peekJobs( [self.job.PandaID], fromDefined=False, fromArchived=True, fromWaiting=False)[0] self.logger.debug( "status {0}, taskBufferErrorCode {1}, taskBufferErrorDiag {2}" .format(job_tmp.jobStatus, job_tmp.taskBufferErrorCode, job_tmp.taskBufferErrorDiag)) if job_tmp.jobStatus == 'failed' and job_tmp.taskBufferErrorCode: source = 'taskBufferErrorCode' error_code = job_tmp.taskBufferErrorCode error_diag = job_tmp.taskBufferErrorDiag errors = [{ 'source': source, 'error_code': error_code, 'error_diag': error_diag }] self.logger.debug( "AdderGen.run 2 will call apply_retrial_rules") retryModule.apply_retrial_rules( self.taskBuffer, job_tmp.PandaID, errors, job_tmp.attemptNr) self.logger.debug("apply_retrial_rules 2 is back") except IndexError: pass except Exception as e: self.logger.error( "apply_retrial_rules 2 excepted and needs to be investigated (%s): %s" % (e, traceback.format_exc())) # setup for closer if not (EventServiceUtils.isEventServiceJob(self.job) and self.job.isCancelled()): destDBList = [] guidList = [] for file in self.job.Files: # ignore inputs if file.type == 'input': continue # skip pseudo datasets if file.destinationDBlock in ['', None, 'NULL']: continue # start closer for output/log datasets if file.destinationDBlock not in destDBList: destDBList.append(file.destinationDBlock) # collect GUIDs if (self.job.prodSourceLabel=='panda' or (self.job.prodSourceLabel in ['rucio_test'] + JobUtils.list_ptest_prod_sources and \ self.job.processingType in ['pathena','prun','gangarobot-rctest','hammercloud'])) \ and file.type == 'output': # extract base LFN since LFN was changed to full LFN for CMS baseLFN = file.lfn.split('/')[-1] guidList.append({ 'lfn': baseLFN, 'guid': file.GUID, 'type': file.type, 'checksum': file.checksum, 'md5sum': file.md5sum, 'fsize': file.fsize, 'scope': file.scope }) if guidList != []: retG = self.taskBuffer.setGUIDs(guidList) if destDBList != []: # start Closer if adderPlugin is not None and hasattr( adderPlugin, 'datasetMap' ) and adderPlugin.datasetMap != {}: cThr = Closer.Closer( self.taskBuffer, destDBList, self.job, datasetMap=adderPlugin.datasetMap) else: cThr = Closer.Closer(self.taskBuffer, destDBList, self.job) self.logger.debug("start Closer") # cThr.start() # cThr.join() cThr.run() del cThr self.logger.debug("end Closer") # run closer for assocaiate parallel jobs if EventServiceUtils.isJobCloningJob(self.job): assDBlockMap = self.taskBuffer.getDestDBlocksWithSingleConsumer( self.job.jediTaskID, self.job.PandaID, destDBList) for assJobID in assDBlockMap: assDBlocks = assDBlockMap[assJobID] assJob = self.taskBuffer.peekJobs( [assJobID], fromDefined=False, fromArchived=False, fromWaiting=False, forAnal=True)[0] if self.job is None: self.logger.debug( ': associated job PandaID={0} not found in DB' .format(assJobID)) else: cThr = Closer.Closer( self.taskBuffer, assDBlocks, assJob) self.logger.debug( "start Closer for PandaID={0}".format( assJobID)) # cThr.start() # cThr.join() cThr.run() del cThr self.logger.debug( "end Closer for PandaID={0}".format( assJobID)) self.logger.debug("end") # try: # # remove Catalog # os.remove(self.xmlFile) # except Exception: # pass # remove Catalog self.taskBuffer.deleteJobOutputReport(panda_id=self.jobID, attempt_nr=self.attemptNr) del self.data del report_dict except Exception as e: errStr = ": {} {}".format(str(e), traceback.format_exc()) self.logger.error(errStr) self.logger.error("except") # unlock job output report self.taskBuffer.unlockJobOutputReport(panda_id=self.jobID, attempt_nr=self.attemptNr, pid=self.pid, lock_offset=self.lock_offset) # parse XML # 0: succeeded, 1: harmless error to exit, 2: fatal error, 3: event service def parseXML(self): # get LFN and GUID # self.logger.debug('XML filename : %s' % self.xmlFile) # no outputs log_out = [f for f in self.job.Files if f.type in ['log', 'output']] if not log_out: self.logger.debug("has no outputs") self.logger.debug("parseXML end") return 0 # get input files inputLFNs = [] for file in self.job.Files: if file.type == 'input': inputLFNs.append(file.lfn) # parse XML lfns = [] guids = [] fsizes = [] md5sums = [] chksums = [] surls = [] fullLfnMap = {} nEventsMap = {} guidMap = dict() try: # root = xml.dom.minidom.parse(self.xmlFile) root = xml.dom.minidom.parseString(self.data) files = root.getElementsByTagName('File') for file in files: # get GUID guid = str(file.getAttribute('ID')) # get PFN and LFN nodes logical = file.getElementsByTagName('logical')[0] lfnNode = logical.getElementsByTagName('lfn')[0] # convert UTF8 to Raw lfn = str(lfnNode.getAttribute('name')) # get metadata fsize = None md5sum = None adler32 = None surl = None fullLFN = None for meta in file.getElementsByTagName('metadata'): # get fsize name = str(meta.getAttribute('att_name')) if name == 'fsize': fsize = long(meta.getAttribute('att_value')) elif name == 'md5sum': md5sum = str(meta.getAttribute('att_value')) # check if re.search("^[a-fA-F0-9]{32}$", md5sum) is None: md5sum = None elif name == 'adler32': adler32 = str(meta.getAttribute('att_value')) elif name == 'surl': surl = str(meta.getAttribute('att_value')) elif name == 'full_lfn': fullLFN = str(meta.getAttribute('att_value')) # endpoints self.extraInfo['endpoint'][lfn] = [] for epNode in file.getElementsByTagName('endpoint'): self.extraInfo['endpoint'][lfn].append( str(epNode.firstChild.data)) # error check if (lfn not in inputLFNs) and (fsize is None or (md5sum is None and adler32 is None)): if EventServiceUtils.isEventServiceMerge(self.job): continue else: raise RuntimeError('fsize/md5sum/adler32/surl=None') # append lfns.append(lfn) guids.append(guid) fsizes.append(fsize) md5sums.append(md5sum) surls.append(surl) if adler32 is not None: # use adler32 if available chksums.append("ad:%s" % adler32) else: chksums.append("md5:%s" % md5sum) if fullLFN is not None: fullLfnMap[lfn] = fullLFN except Exception: # parse json try: import json # with open(self.xmlFile) as tmpF: jsonDict = json.loads(self.data) for lfn in jsonDict: fileData = jsonDict[lfn] lfn = str(lfn) fsize = None md5sum = None adler32 = None surl = None fullLFN = None guid = str(fileData['guid']) if 'fsize' in fileData: fsize = long(fileData['fsize']) if 'md5sum' in fileData: md5sum = str(fileData['md5sum']) # check if re.search("^[a-fA-F0-9]{32}$", md5sum) is None: md5sum = None if 'adler32' in fileData: adler32 = str(fileData['adler32']) if 'surl' in fileData: surl = str(fileData['surl']) if 'full_lfn' in fileData: fullLFN = str(fileData['full_lfn']) # endpoints self.extraInfo['endpoint'][lfn] = [] if 'endpoint' in fileData: self.extraInfo['endpoint'][lfn] = fileData['endpoint'] # error check if (lfn not in inputLFNs) and (fsize is None or (md5sum is None and adler32 is None)): if EventServiceUtils.isEventServiceMerge(self.job): continue else: raise RuntimeError( 'fsize/md5sum/adler32/surl=None') # append lfns.append(lfn) guids.append(guid) fsizes.append(fsize) md5sums.append(md5sum) surls.append(surl) if adler32 is not None: # use adler32 if available chksums.append("ad:%s" % adler32) else: chksums.append("md5:%s" % md5sum) if fullLFN is not None: fullLfnMap[lfn] = fullLFN except Exception: # check if file exists # if os.path.exists(self.xmlFile): if True: type, value, traceBack = sys.exc_info() self.logger.error(": %s %s" % (type, value)) # set failed anyway self.job.jobStatus = 'failed' # XML error happens when pilot got killed due to wall-time limit or failures in wrapper if (self.job.pilotErrorCode in [0,'0','NULL']) and \ (self.job.taskBufferErrorCode not in [pandaserver.taskbuffer.ErrorCode.EC_WorkerDone]) and \ (self.job.transExitCode in [0,'0','NULL']): self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder self.job.ddmErrorDiag = "Could not get GUID/LFN/MD5/FSIZE/SURL from pilot XML" return 2 else: # XML was deleted return 1 # parse metadata to get nEvents nEventsFrom = None try: root = xml.dom.minidom.parseString(self.job.metadata) files = root.getElementsByTagName('File') for file in files: # get GUID guid = str(file.getAttribute('ID')) # get PFN and LFN nodes logical = file.getElementsByTagName('logical')[0] lfnNode = logical.getElementsByTagName('lfn')[0] # convert UTF8 to Raw lfn = str(lfnNode.getAttribute('name')) guidMap[lfn] = guid # get metadata nevents = None for meta in file.getElementsByTagName('metadata'): # get fsize name = str(meta.getAttribute('att_name')) if name == 'events': nevents = long(meta.getAttribute('att_value')) nEventsMap[lfn] = nevents break nEventsFrom = "xml" except Exception: pass # parse json try: import json jsonDict = json.loads(self.job.metadata) for jsonFileItem in jsonDict['files']['output']: for jsonSubFileItem in jsonFileItem['subFiles']: lfn = str(jsonSubFileItem['name']) try: nevents = long(jsonSubFileItem['nentries']) nEventsMap[lfn] = nevents except Exception: pass try: guid = str(jsonSubFileItem['file_guid']) guidMap[lfn] = guid except Exception: pass nEventsFrom = "json" except Exception: pass # use nEvents and GUIDs reported by the pilot if no job report if self.job.metadata == 'NULL' and self.jobStatus == 'finished' and self.job.nEvents > 0 \ and self.job.prodSourceLabel in ['managed']: for file in self.job.Files: if file.type == 'output': nEventsMap[file.lfn] = self.job.nEvents for lfn, guid in zip(lfns, guids): guidMap[lfn] = guid nEventsFrom = "pilot" self.logger.debug('nEventsMap=%s' % str(nEventsMap)) self.logger.debug('nEventsFrom=%s' % str(nEventsFrom)) self.logger.debug('guidMap=%s' % str(guidMap)) self.logger.debug('self.job.jobStatus=%s in parseXML' % self.job.jobStatus) self.logger.debug( 'isES=%s isJumbo=%s' % (EventServiceUtils.isEventServiceJob( self.job), EventServiceUtils.isJumboJob(self.job))) # get lumi block number lumiBlockNr = self.job.getLumiBlockNr() # copy files for variable number of outputs tmpStat = self.copyFilesForVariableNumOutputs(lfns) if not tmpStat: self.logger.error( "failed to copy files for variable number of outputs") return 2 # check files fileList = [] for file in self.job.Files: fileList.append(file.lfn) if file.type == 'input': if file.lfn in lfns: if self.job.prodSourceLabel in ['user', 'panda']: # skipped file file.status = 'skipped' elif self.job.prodSourceLabel in [ 'managed', 'test' ] + JobUtils.list_ptest_prod_sources: # failed by pilot file.status = 'failed' elif file.type == 'output' or file.type == 'log': # add only log file for failed jobs if self.jobStatus == 'failed' and file.type != 'log': file.status = 'failed' continue # set failed if it is missing in XML if file.lfn not in lfns: if (self.job.jobStatus == 'finished' and EventServiceUtils.isEventServiceJob(self.job)) \ or EventServiceUtils.isJumboJob(self.job): # unset file status for ES jobs pass elif file.isAllowedNoOutput(): # allowed not to be produced file.status = 'nooutput' self.logger.debug('set {0} to status={1}'.format( file.lfn, file.status)) else: file.status = 'failed' self.job.jobStatus = 'failed' self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder self.job.ddmErrorDiag = "expected output {0} is missing in pilot XML".format( file.lfn) self.logger.error(self.job.ddmErrorDiag) continue # look for GUID with LFN try: i = lfns.index(file.lfn) file.GUID = guids[i] file.fsize = fsizes[i] file.md5sum = md5sums[i] file.checksum = chksums[i] surl = surls[i] # status file.status = 'ready' # change to full LFN if file.lfn in fullLfnMap: file.lfn = fullLfnMap[file.lfn] # add SURL to extraInfo self.extraInfo['surl'][file.lfn] = surl # add nevents if file.lfn in nEventsMap: self.extraInfo['nevents'][file.lfn] = nEventsMap[ file.lfn] except Exception: # status file.status = 'failed' type, value, traceBack = sys.exc_info() self.logger.error(": %s %s" % (type, value)) # set lumi block number if lumiBlockNr is not None and file.status != 'failed': self.extraInfo['lbnr'][file.lfn] = lumiBlockNr self.extraInfo['guid'] = guidMap # check consistency between XML and filesTable for lfn in lfns: if lfn not in fileList: self.logger.error("%s is not found in filesTable" % lfn) self.job.jobStatus = 'failed' for tmpFile in self.job.Files: tmpFile.status = 'failed' self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder self.job.ddmErrorDiag = "pilot produced {0} inconsistently with jobdef".format( lfn) return 2 # return self.logger.debug("parseXML end") return 0 # copy files for variable number of outputs def copyFilesForVariableNumOutputs(self, lfns): # get original output files origOutputs = {} updateOrig = {} for tmpFile in self.job.Files: if tmpFile.type in ['output', 'log']: origOutputs[tmpFile.lfn] = tmpFile if tmpFile.lfn in lfns: # keep original updateOrig[tmpFile.lfn] = False else: # overwrite original updateOrig[tmpFile.lfn] = True # look for unkown files addedNewFiles = False for newLFN in lfns: if newLFN not in origOutputs: # look for corresponding original output for origLFN in origOutputs: tmpPatt = '^{0}\.*_\d+$'.format(origLFN) if re.search(tmpPatt, newLFN) is not None: # copy file record tmpStat = self.taskBuffer.copyFileRecord( newLFN, origOutputs[origLFN], updateOrig[origLFN]) if not tmpStat: return False addedNewFiles = True # disable further overwriting updateOrig[origLFN] = False break # refresh job info if addedNewFiles: self.job = self.taskBuffer.peekJobs([self.jobID], fromDefined=False, fromWaiting=False, forAnal=True)[0] # return return True
class EventPicker: # constructor def __init__(self, taskBuffer, siteMapper, evpFileName, ignoreError): self.taskBuffer = taskBuffer self.siteMapper = siteMapper self.ignoreError = ignoreError self.evpFileName = evpFileName self.token = datetime.datetime.utcnow().isoformat(' ') # logger self.logger = LogWrapper(_logger, self.token) self.pd2p = DynDataDistributer.DynDataDistributer([], self.taskBuffer, self.siteMapper, token=' ', logger=self.logger) self.userDatasetName = '' self.creationTime = '' self.params = '' self.lockedBy = '' self.evpFile = None self.userTaskName = '' # message buffer self.msgBuffer = [] self.lineLimit = 100 # JEDI self.jediTaskID = None self.prodSourceLabel = None self.job_label = None # main def run(self): try: self.putLog('start %s' % self.evpFileName) # lock evp file self.evpFile = open(self.evpFileName) try: fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) except Exception: # relase self.putLog("cannot lock %s" % self.evpFileName) self.evpFile.close() return True # options runEvtList = [] eventPickDataType = '' eventPickStreamName = '' eventPickDS = [] eventPickAmiTag = '' eventPickNumSites = 1 inputFileList = [] tagDsList = [] tagQuery = '' tagStreamRef = '' skipDaTRI = False runEvtGuidMap = {} ei_api = '' # read evp file for tmpLine in self.evpFile: tmpMatch = re.search('^([^=]+)=(.+)$', tmpLine) # check format if tmpMatch is None: continue tmpItems = tmpMatch.groups() if tmpItems[0] == 'runEvent': # get run and event number tmpRunEvt = tmpItems[1].split(',') if len(tmpRunEvt) == 2: runEvtList.append(tmpRunEvt) elif tmpItems[0] == 'eventPickDataType': # data type eventPickDataType = tmpItems[1] elif tmpItems[0] == 'eventPickStreamName': # stream name eventPickStreamName = tmpItems[1] elif tmpItems[0] == 'eventPickDS': # dataset pattern eventPickDS = tmpItems[1].split(',') elif tmpItems[0] == 'eventPickAmiTag': # AMI tag eventPickAmiTag = tmpItems[1] elif tmpItems[0] == 'eventPickNumSites': # the number of sites where datasets are distributed try: eventPickNumSites = int(tmpItems[1]) except Exception: pass elif tmpItems[0] == 'userName': # user name self.userDN = tmpItems[1] self.putLog("user=%s" % self.userDN) elif tmpItems[0] == 'userTaskName': # user task name self.userTaskName = tmpItems[1] elif tmpItems[0] == 'userDatasetName': # user dataset name self.userDatasetName = tmpItems[1] elif tmpItems[0] == 'lockedBy': # client name self.lockedBy = tmpItems[1] elif tmpItems[0] == 'creationTime': # creation time self.creationTime = tmpItems[1] elif tmpItems[0] == 'params': # parameters self.params = tmpItems[1] elif tmpItems[0] == 'ei_api': # ei api parameter for MC ei_api = tmpItems[1] elif tmpItems[0] == 'inputFileList': # input file list inputFileList = tmpItems[1].split(',') try: inputFileList.remove('') except Exception: pass elif tmpItems[0] == 'tagDS': # TAG dataset tagDsList = tmpItems[1].split(',') elif tmpItems[0] == 'tagQuery': # query for TAG tagQuery = tmpItems[1] elif tmpItems[0] == 'tagStreamRef': # StreamRef for TAG tagStreamRef = tmpItems[1] if not tagStreamRef.endswith('_ref'): tagStreamRef += '_ref' elif tmpItems[0] == 'runEvtGuidMap': # GUIDs try: runEvtGuidMap = eval(tmpItems[1]) except Exception: pass # extract task name if self.userTaskName == '' and self.params != '': try: tmpMatch = re.search('--outDS(=| ) *([^ ]+)', self.params) if tmpMatch is not None: self.userTaskName = tmpMatch.group(2) if not self.userTaskName.endswith('/'): self.userTaskName += '/' except Exception: pass # suppress DaTRI if self.params != '': if '--eventPickSkipDaTRI' in self.params: skipDaTRI = True # get compact user name compactDN = self.taskBuffer.cleanUserID(self.userDN) # get jediTaskID self.jediTaskID = self.taskBuffer.getTaskIDwithTaskNameJEDI( compactDN, self.userTaskName) # get prodSourceLabel self.prodSourceLabel, self.job_label = self.taskBuffer.getProdSourceLabelwithTaskID( self.jediTaskID) # convert run/event list to dataset/file list tmpRet, locationMap, allFiles = self.pd2p.convertEvtRunToDatasets( runEvtList, eventPickDataType, eventPickStreamName, eventPickDS, eventPickAmiTag, self.userDN, runEvtGuidMap, ei_api) if not tmpRet: if 'isFatal' in locationMap and locationMap['isFatal'] is True: self.ignoreError = False self.endWithError( 'Failed to convert the run/event list to a dataset/file list' ) return False # use only files in the list if inputFileList != []: tmpAllFiles = [] for tmpFile in allFiles: if tmpFile['lfn'] in inputFileList: tmpAllFiles.append(tmpFile) allFiles = tmpAllFiles # remove redundant CN from DN tmpDN = self.userDN tmpDN = re.sub('/CN=limited proxy', '', tmpDN) tmpDN = re.sub('(/CN=proxy)+$', '', tmpDN) # make dataset container tmpRet = self.pd2p.registerDatasetContainerWithDatasets( self.userDatasetName, allFiles, locationMap, nSites=eventPickNumSites, owner=tmpDN) if not tmpRet: self.endWithError('Failed to make a dataset container %s' % self.userDatasetName) return False # skip DaTRI if skipDaTRI: # successfully terminated self.putLog("skip DaTRI") # update task self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID) else: # get candidates tmpRet, candidateMaps = self.pd2p.getCandidates( self.userDatasetName, self.prodSourceLabel, self.job_label, checkUsedFile=False, useHidden=True) if not tmpRet: self.endWithError( 'Failed to find candidate for destination') return False # collect all candidates allCandidates = [] for tmpDS in candidateMaps: tmpDsVal = candidateMaps[tmpDS] for tmpCloud in tmpDsVal: tmpCloudVal = tmpDsVal[tmpCloud] for tmpSiteName in tmpCloudVal[0]: if tmpSiteName not in allCandidates: allCandidates.append(tmpSiteName) if allCandidates == []: self.endWithError('No candidate for destination') return False # get list of dataset (container) names if eventPickNumSites > 1: # decompose container to transfer datasets separately tmpRet, tmpOut = self.pd2p.getListDatasetReplicasInContainer( self.userDatasetName) if not tmpRet: self.endWithError('Failed to get replicas in %s' % self.userDatasetName) return False userDatasetNameList = list(tmpOut) else: # transfer container at once userDatasetNameList = [self.userDatasetName] # loop over all datasets sitesUsed = [] for tmpUserDatasetName in userDatasetNameList: # get size of dataset container tmpRet, totalInputSize = rucioAPI.getDatasetSize( tmpUserDatasetName) if not tmpRet: self.endWithError( 'Failed to get the size of {0} with {1}'.format( tmpUserDatasetName, totalInputSize)) return False # run brokerage tmpJob = JobSpec() tmpJob.AtlasRelease = '' self.putLog("run brokerage for %s" % tmpDS) pandaserver.brokerage.broker.schedule( [tmpJob], self.taskBuffer, self.siteMapper, True, allCandidates, True, datasetSize=totalInputSize) if tmpJob.computingSite.startswith('ERROR'): self.endWithError('brokerage failed with %s' % tmpJob.computingSite) return False self.putLog("site -> %s" % tmpJob.computingSite) # send transfer request try: tmpDN = rucioAPI.parse_dn(tmpDN) tmpStatus, userInfo = rucioAPI.finger(tmpDN) if not tmpStatus: raise RuntimeError( 'user info not found for {0} with {1}'.format( tmpDN, userInfo)) tmpDN = userInfo['nickname'] tmpSiteSpec = self.siteMapper.getSite( tmpJob.computingSite) scope_input, scope_output = select_scope( tmpSiteSpec, JobUtils.ANALY_PS, JobUtils.ANALY_PS) tmpDQ2ID = tmpSiteSpec.ddm_input[scope_input] tmpMsg = "%s ds=%s site=%s id=%s" % ( 'registerDatasetLocation for DaTRI ', tmpUserDatasetName, tmpDQ2ID, tmpDN) self.putLog(tmpMsg) rucioAPI.registerDatasetLocation( tmpDS, [tmpDQ2ID], lifetime=14, owner=tmpDN, activity="User Subscriptions") self.putLog('OK') except Exception: errType, errValue = sys.exc_info()[:2] tmpStr = 'Failed to send transfer request : %s %s' % ( errType, errValue) tmpStr.strip() tmpStr += traceback.format_exc() self.endWithError(tmpStr) return False # list of sites already used sitesUsed.append(tmpJob.computingSite) self.putLog("used %s sites" % len(sitesUsed)) # set candidates if len(sitesUsed) >= eventPickNumSites: # reset candidates to limit the number of sites allCandidates = sitesUsed sitesUsed = [] else: # remove site allCandidates.remove(tmpJob.computingSite) # send email notification for success tmpMsg = 'A transfer request was successfully sent to Rucio.\n' tmpMsg += 'Your task will get started once transfer is completed.' self.sendEmail(True, tmpMsg) try: # unlock and delete evp file fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_UN) self.evpFile.close() os.remove(self.evpFileName) except Exception: pass # successfully terminated self.putLog("end %s" % self.evpFileName) return True except Exception: errType, errValue = sys.exc_info()[:2] self.endWithError('Got exception %s:%s %s' % (errType, errValue, traceback.format_exc())) return False # end with error def endWithError(self, message): self.putLog(message, 'error') # unlock evp file try: fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_UN) self.evpFile.close() if not self.ignoreError: # remove evp file os.remove(self.evpFileName) # send email notification self.sendEmail(False, message) except Exception: pass # upload log if self.jediTaskID is not None: outLog = self.uploadLog() self.taskBuffer.updateTaskErrorDialogJEDI( self.jediTaskID, 'event picking failed. ' + outLog) # update task if not self.ignoreError: self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID, 'tobroken') self.putLog(outLog) self.putLog('end %s' % self.evpFileName) # put log def putLog(self, msg, type='debug'): tmpMsg = msg if type == 'error': self.logger.error(tmpMsg) else: self.logger.debug(tmpMsg) # send email notification def sendEmail(self, isSucceeded, message): # mail address toAdder = Notifier(self.taskBuffer, None, []).getEmail(self.userDN) if toAdder == '': self.putLog('cannot find email address for %s' % self.userDN, 'error') return # subject mailSubject = "PANDA notification for Event-Picking Request" # message mailBody = "Hello,\n\nHere is your request status for event picking\n\n" if isSucceeded: mailBody += "Status : Passed to Rucio\n" else: mailBody += "Status : Failed\n" mailBody += "Created : %s\n" % self.creationTime mailBody += "Ended : %s\n" % datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') mailBody += "Dataset : %s\n" % self.userDatasetName mailBody += "\n" mailBody += "Parameters : %s %s\n" % (self.lockedBy, self.params) mailBody += "\n" mailBody += "%s\n" % message # send retVal = MailUtils().send(toAdder, mailSubject, mailBody) # return return # upload log def uploadLog(self): if self.jediTaskID is None: return 'cannot find jediTaskID' strMsg = self.logger.dumpToString() s, o = Client.uploadLog(strMsg, self.jediTaskID) if s != 0: return "failed to upload log with {0}.".format(s) if o.startswith('http'): return '<a href="{0}">log</a>'.format(o) return o
def putFile(req, file): tmpLog = LogWrapper(_logger, 'putFile-{}'.format(datetime.datetime.utcnow().isoformat('/'))) if not Protocol.isSecure(req): tmpLog.error('No SSL_CLIENT_S_DN') return False if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']: return False # user name username = CoreUtils.clean_user_id(req.subprocess_env['SSL_CLIENT_S_DN']) tmpLog.debug("start %s %s" % (username, file.filename)) # size check fullSizeLimit = 768*1024*1024 if not file.filename.startswith('sources.'): noBuild = True sizeLimit = 100*1024*1024 else: noBuild = False sizeLimit = fullSizeLimit # get file size contentLength = 0 try: contentLength = long(req.headers_in["content-length"]) except Exception: if "content-length" in req.headers_in: tmpLog.error("cannot get CL : %s" % req.headers_in["content-length"]) else: tmpLog.error("no CL") tmpLog.debug("size %s" % contentLength) if contentLength > sizeLimit: errStr = "ERROR : Upload failure. Exceeded size limit %s>%s." % (contentLength,sizeLimit) if noBuild: errStr += " Please submit the job without --noBuild/--libDS since those options impose a tighter size limit" else: errStr += " Please remove redundant files from your workarea" tmpLog.error(errStr) tmpLog.debug("end") return errStr try: fileName = file.filename.split('/')[-1] fileFullPath = '%s/%s' % (panda_config.cache_dir, fileName) # avoid overwriting if os.path.exists(fileFullPath): # touch os.utime(fileFullPath,None) # send error message errStr = "ERROR : Cannot overwrite file" tmpLog.debug('cannot overwrite file %s' % fileName) tmpLog.debug("end") return errStr # write fo = open(fileFullPath,'wb') fileContent = file.file.read() if hasattr(panda_config, 'compress_file_names') and \ [True for patt in panda_config.compress_file_names.split(',') if re.search(patt, fileName) is not None]: fileContent = gzip.compress(fileContent) fo.write(fileContent) fo.close() except Exception: errStr = "ERROR : Cannot write file" tmpLog.error(errStr) tmpLog.debug("end") return errStr # checksum try: # decode Footer footer = fileContent[-8:] checkSum,isize = struct.unpack("II",footer) tmpLog.debug("CRC from gzip Footer %s" % checkSum) except Exception: # calculate on the fly """ import zlib checkSum = zlib.adler32(fileContent) & 0xFFFFFFFF """ # use None to avoid delay for now checkSum = None tmpLog.debug("CRC calculated %s" % checkSum) # file size fileSize = len(fileContent) tmpLog.debug("written dn=%s file=%s size=%s crc=%s" % \ (username, fileFullPath, fileSize, checkSum)) # put file info to DB if panda_config.record_sandbox_info: to_insert = True for patt in IGNORED_SUFFIX: if file.filename.endswith(patt): to_insert = False break if not to_insert: tmpLog.debug("skipped to insert to DB") else: statClient,outClient = Client.insertSandboxFileInfo(username,file.filename, fileSize,checkSum) if statClient != 0 or outClient.startswith("ERROR"): tmpLog.error("failed to put sandbox to DB with %s %s" % (statClient,outClient)) #_logger.debug("putFile : end") #return "ERROR : Cannot insert sandbox to DB" else: tmpLog.debug("inserted sandbox to DB with %s" % outClient) tmpLog.debug("end") return True
def checkProxy(self, user_dn, production=False, role=None, name=None): log_stream = LogWrapper(_logger, '< name="{}" role={} >'.format(name, role)) log_stream.info('check proxy for {}'.format(user_dn)) """Check the validity of a proxy.""" if role is not None: tmpExtension = self.getExtension(role) proxy_path = os.path.join( self.__target_path, str(hashlib.sha1(six.b(user_dn + tmpExtension)).hexdigest())) elif production: proxy_path = os.path.join( self.__target_path, str(hashlib.sha1(six.b(user_dn + '.prod')).hexdigest())) else: proxy_path = os.path.join(self.__target_path, hashlib.sha1(six.b(user_dn)).hexdigest()) isOK = False if os.path.isfile(proxy_path): log_stream.info('proxy is there. Need to check validity') cmd = "voms-proxy-info -exists -hours 94 -file %s" % proxy_path stdout, stderr, status = execute(cmd, log_stream) if stdout: log_stream.info('stdout is %s ' % stdout) if stderr: log_stream.info('stderr is %s ' % stderr) if status == 1: log_stream.info( 'proxy expires in 94h or less. We need to renew proxy!') ret = self.store(user_dn, self.__cred_name, production, role=role, log_stream=log_stream) if ret == 0: log_stream.info('proxy retrieval successful') isOK = True elif ret == 2: log_stream.info('proxy retrieval on hold') else: log_stream.error('proxy retrieval failed') else: log_stream.info('proxy is valid for more than 3 days') isOK = True else: log_stream.info( 'proxy is not in the cache repo. will try to get it from myproxy' ) ret = self.store(user_dn, self.__cred_name, production, role=role, log_stream=log_stream) if ret == 0: log_stream.info('proxy stored successfully') isOK = True elif ret == 2: log_stream.info('proxy retrieval on hold') else: log_stream.error('proxy retrieval failed') if isOK: plain_path = os.path.join( self.__target_path, hashlib.sha1(six.b(user_dn + '.plain')).hexdigest()) if os.path.isfile(plain_path): return self.checkValidity(plain_path, log_stream) else: log_stream.error('plain proxy not there at the moment!')
def run(self): try: # make a message instance tmpLog = LogWrapper(_logger, None) # run main procedure in the same process if not self.forkRun: tmpLog.debug('main start') tmpLog.debug('firstSubmission={0}'.format( self.firstSubmission)) # group jobs per VO voJobsMap = {} ddmFreeJobs = [] tmpLog.debug('{0} jobs in total'.format(len(self.jobs))) for tmpJob in self.jobs: # set VO=local for DDM free if tmpJob.destinationSE == 'local': tmpVO = 'local' else: tmpVO = tmpJob.VO # make map voJobsMap.setdefault(tmpVO, []) voJobsMap[tmpVO].append(tmpJob) # loop over all VOs for tmpVO in voJobsMap: tmpJobList = voJobsMap[tmpVO] tmpLog.debug('vo={0} has {1} jobs'.format( tmpVO, len(tmpJobList))) # get plugin setupperPluginClass = panda_config.getPlugin( 'setupper_plugins', tmpVO) if setupperPluginClass is None: # use ATLAS plug-in by default from pandaserver.dataservice.SetupperAtlasPlugin import SetupperAtlasPlugin setupperPluginClass = SetupperAtlasPlugin tmpLog.debug('plugin name -> {0}'.format( setupperPluginClass.__name__)) try: # make plugin setupperPlugin = setupperPluginClass( self.taskBuffer, self.jobs, tmpLog, resubmit=self.resubmit, pandaDDM=self.pandaDDM, ddmAttempt=self.ddmAttempt, onlyTA=self.onlyTA, firstSubmission=self.firstSubmission) # run plugin tmpLog.debug('run plugin') setupperPlugin.run() # go forward if not TA if not self.onlyTA: # update jobs tmpLog.debug('update jobs') self.updateJobs( setupperPlugin.jobs + setupperPlugin.jumboJobs, tmpLog) # execute post process tmpLog.debug('post execute plugin') setupperPlugin.postRun() tmpLog.debug('done plugin') except Exception: errtype, errvalue = sys.exc_info()[:2] tmpLog.error('plugin failed with {0}:{1}'.format( errtype, errvalue)) tmpLog.debug('main end') else: tmpLog.debug('fork start') # write jobs to file import os try: import cPickle as pickle except ImportError: import pickle outFileName = '%s/set.%s_%s' % (panda_config.logdir, self.jobs[0].PandaID, str(uuid.uuid4())) outFile = open(outFileName, 'wb') pickle.dump(self.jobs, outFile, protocol=0) outFile.close() # run main procedure in another process because python doesn't release memory com = 'cd %s > /dev/null 2>&1; export HOME=%s; ' % ( panda_config.home_dir_cwd, panda_config.home_dir_cwd) com += 'env PYTHONPATH=%s:%s %s/python -Wignore %s/dataservice/forkSetupper.py -i %s' % \ (panda_config.pandaCommon_dir,panda_config.pandaPython_dir,panda_config.native_python, panda_config.pandaPython_dir,outFileName) if self.onlyTA: com += " -t" if not self.firstSubmission: com += " -f" tmpLog.debug(com) # execute status, output = self.taskBuffer.processLimiter.getstatusoutput( com) tmpLog.debug("return from main process: %s %s" % (status, output)) tmpLog.debug('fork end') except Exception as e: tmpLog.error('master failed with {0} {1}'.format( str(e), traceback.format_exc()))
def application(environ, start_response): # get method name methodName = '' if 'SCRIPT_NAME' in environ: methodName = environ['SCRIPT_NAME'].split('/')[-1] tmpLog = LogWrapper(_logger, "PID={0} {1}".format(os.getpid(), methodName)) tmpLog.debug("start") regStart = datetime.datetime.utcnow() retType = None # check method name if not methodName in allowedMethods: tmpLog.error("is forbidden") exeRes = "False : %s is forbidden" % methodName else: # get method object tmpMethod = None try: tmpMethod = globals()[methodName] except Exception: pass # object not found if tmpMethod is None: tmpLog.error("is undefined") exeRes = "False" else: try: # get params tmpPars = cgi.FieldStorage(environ['wsgi.input'], environ=environ, keep_blank_values=1) # convert to map params = {} for tmpKey in list(tmpPars): if tmpPars[tmpKey].file is not None and tmpPars[tmpKey].filename is not None: # file params[tmpKey] = tmpPars[tmpKey] else: # string params[tmpKey] = tmpPars.getfirst(tmpKey) if panda_config.entryVerbose: tmpLog.debug("with %s" % str(list(params))) # dummy request object dummyReq = DummyReq(environ, tmpLog) param_list = [dummyReq] # exec exeRes = tmpMethod(*param_list, **params) # extract return type if isinstance(exeRes, dict): retType = exeRes['type'] exeRes = exeRes['content'] # convert bool to string if exeRes in [True,False]: exeRes = str(exeRes) except Exception as e: tmpLog.error("execution failure : {0}".format(str(e))) errStr = "" for tmpKey in environ: tmpVal = environ[tmpKey] errStr += "%s : %s\n" % (tmpKey,str(tmpVal)) tmpLog.error(errStr) # return internal server error start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')]) return [str(e)] if panda_config.entryVerbose: tmpLog.debug("done") regTime = datetime.datetime.utcnow() - regStart tmpLog.info("exec_time=%s.%03d sec, return len=%s B" % (regTime.seconds, regTime.microseconds/1000, len(str(exeRes)))) # return if exeRes == pandaserver.taskbuffer.ErrorCode.EC_NotFound: start_response('404 Not Found', [('Content-Type', 'text/plain')]) return ['not found'] elif isinstance(exeRes, pandaserver.taskbuffer.ErrorCode.EC_Redirect): start_response('302 Redirect', [('Location', exeRes.url)]) return ['redirect'] else: if retType == 'json': start_response('200 OK', [('Content-Type', 'application/json')]) else: start_response('200 OK', [('Content-Type', 'text/plain')]) if isinstance(exeRes, str): exeRes = exeRes.encode() return [exeRes]
def process(self, file_name, to_delete=False, test_mode=False, get_log=False, dump_workflow=False): try: is_fatal = False is_OK = True request_id = None dump_str = None with open(file_name) as f: ops = json.load(f) user_name = clean_user_id(ops["userName"]) base_platform = ops['data'].get('base_platform') for task_type in ops['data']['taskParams']: ops['data']['taskParams'][task_type]['userName'] = user_name if base_platform: ops['data']['taskParams'][task_type]['basePlatform'] = base_platform log_token = '< id="{}" test={} outDS={} >'.format(user_name, test_mode, ops['data']['outDS']) tmpLog = LogWrapper(self.log, log_token) tmpLog.info('start {}'.format(file_name)) sandbox_url = os.path.join(ops['data']['sourceURL'], 'cache', ops['data']['sandbox']) # IO through json files ops_file = tempfile.NamedTemporaryFile(delete=False, mode='w') json.dump(ops, ops_file) ops_file.close() # execute main in another process to avoid chdir mess tmp_stat, tmp_out = commands_get_status_output("python {} {} '{}' {} {} '{}' {}".format( __file__, sandbox_url, log_token, dump_workflow, ops_file.name, user_name, test_mode)) if tmp_stat: is_OK = False tmpLog.error('main execution failed with {}:{}'.format(tmp_stat, tmp_out)) else: with open(tmp_out.split('\n')[-1]) as tmp_out_file: is_OK, is_fatal, request_id, dump_str = json.load(tmp_out_file) try: os.remove(tmp_out) except Exception: pass if not get_log: if is_OK: tmpLog.info('is_OK={} request_id={}'.format(is_OK, request_id)) else: tmpLog.info('is_OK={} is_fatal={} request_id={}'.format(is_OK, is_fatal, request_id)) if to_delete or (not test_mode and (is_OK or is_fatal)): dump_str = tmpLog.dumpToString() + dump_str tmpLog.debug('delete {}'.format(file_name)) try: os.remove(file_name) except Exception: pass # send notification if not test_mode and self.taskBuffer is not None: toAdder = self.taskBuffer.getEmailAddr(user_name) if toAdder is None or toAdder.startswith('notsend'): tmpLog.debug('skip to send notification since suppressed') else: # message if is_OK: mailSubject = "PANDA Notification for Workflow {}".format(ops['data']['outDS']) mailBody = "Hello,\n\nWorkflow:{} has been accepted with RequestID:{}\n\n".\ format(ops['data']['outDS'], request_id) else: mailSubject = "PANDA WARNING for Workflow={}".format(ops['data']['outDS']) mailBody = "Hello,\n\nWorkflow {} was not accepted\n\n".\ format(ops['data']['outDS'], request_id) mailBody += "Reason : %s\n" % dump_str # send tmpSM = MailUtils().send(toAdder, mailSubject, mailBody) tmpLog.debug('sent message with {}'.format(tmpSM)) except Exception as e: is_OK = False tmpLog.error("failed to run with {} {}".format(str(e), traceback.format_exc())) if get_log: ret_val = {'status': is_OK} if is_OK: ret_val['log'] = dump_str else: if dump_str is None: ret_val['log'] = tmpLog.dumpToString() else: ret_val['log'] = dump_str return ret_val
def core_exec(sandbox_url, log_token, dump_workflow, ops_file, user_name, test_mode): tmpLog = LogWrapper(_logger, log_token) is_OK = True is_fatal = False request_id = None if dump_workflow == 'True': dump_workflow = True else: dump_workflow = False if test_mode == 'True': test_mode = True else: test_mode = False try: with open(ops_file) as f: ops = json.load(f) try: os.remove(ops_file) except Exception: pass # go to temp dir cur_dir = os.getcwd() with tempfile.TemporaryDirectory() as tmp_dirname: os.chdir(tmp_dirname) # download sandbox tmpLog.info('downloading sandbox from {}'.format(sandbox_url)) with requests.get(sandbox_url, allow_redirects=True, verify=False, stream=True) as r: if r.status_code == 400: tmpLog.error("not found") is_fatal = True is_OK = False elif r.status_code != 200: tmpLog.error("bad HTTP response {}".format(r.status_code)) is_OK = False # extract sandbox if is_OK: with open(ops['data']['sandbox'], 'wb') as fs: for chunk in r.raw.stream(1024, decode_content=False): if chunk: fs.write(chunk) fs.close() tmp_stat, tmp_out = commands_get_status_output( 'tar xvfz {}'.format(ops['data']['sandbox'])) if tmp_stat != 0: tmpLog.error(tmp_out) dump_str = 'failed to extract {}'.format(ops['data']['sandbox']) tmpLog.error(dump_str) is_fatal = True is_OK = False # parse workflow files if is_OK: tmpLog.info('parse workflow') if ops['data']['language'] == 'cwl': nodes, root_in = pcwl_utils.parse_workflow_file(ops['data']['workflowSpecFile'], tmpLog) with open(ops['data']['workflowInputFile']) as workflow_input: data = yaml.safe_load(workflow_input) s_id, t_nodes, nodes = pcwl_utils.resolve_nodes(nodes, root_in, data, 0, set(), ops['data']['outDS'], tmpLog) workflow_utils.set_workflow_outputs(nodes) id_node_map = workflow_utils.get_node_id_map(nodes) [node.resolve_params(ops['data']['taskParams'], id_node_map) for node in nodes] dump_str = "the description was internally converted as follows\n" \ + workflow_utils.dump_nodes(nodes) tmpLog.info(dump_str) for node in nodes: s_check, o_check = node.verify() tmp_str = 'Verification failure in ID:{} {}'.format(node.id, o_check) if not s_check: tmpLog.error(tmp_str) dump_str += tmp_str dump_str += '\n' is_fatal = True is_OK = False else: dump_str = "{} is not supported to describe the workflow" tmpLog.error(dump_str) is_fatal = True is_OK = False # convert to workflow if is_OK: workflow_to_submit, dump_str_list = workflow_utils.convert_nodes_to_workflow(nodes) try: if workflow_to_submit: if not test_mode: tmpLog.info('submit workflow') wm = ClientManager(host=get_rest_host()) request_id = wm.submit(workflow_to_submit, username=user_name) else: dump_str = 'workflow is empty' tmpLog.error(dump_str) is_fatal = True is_OK = False except Exception as e: dump_str = 'failed to submit the workflow with {}'.format(str(e)) tmpLog.error('{} {}'.format(dump_str, traceback.format_exc())) if dump_workflow: tmpLog.debug('\n' + ''.join(dump_str_list)) os.chdir(cur_dir) except Exception as e: is_OK = False is_fatal = True tmpLog.error("failed to run with {} {}".format(str(e), traceback.format_exc())) with tempfile.NamedTemporaryFile(delete=False, mode='w') as tmp_json: json.dump([is_OK, is_fatal, request_id, tmpLog.dumpToString()], tmp_json) print(tmp_json.name) sys.exit(0)
def main(argv=tuple(), tbuf=None, **kwargs): try: long except NameError: long = int tmpLog = LogWrapper(_logger, None) tmpLog.debug("===================== start =====================") # current minute currentMinute = datetime.datetime.utcnow().minute # instantiate TB if tbuf is None: from pandaserver.taskbuffer.TaskBuffer import taskBuffer taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) else: taskBuffer = tbuf # instantiate sitemapper aSiteMapper = SiteMapper(taskBuffer) # delete tmpLog.debug("Del session") status, retSel = taskBuffer.querySQLS( "SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4", {}) if retSel is not None: try: maxID = retSel[0][0] tmpLog.debug("maxID : %s" % maxID) if maxID is not None: varMap = {} varMap[':maxID'] = maxID varMap[':jobStatus1'] = 'activated' varMap[':jobStatus2'] = 'waiting' varMap[':jobStatus3'] = 'failed' varMap[':jobStatus4'] = 'cancelled' status, retDel = taskBuffer.querySQLS( "DELETE FROM ATLAS_PANDA.jobsDefined4 WHERE PandaID<:maxID AND jobStatus IN (:jobStatus1,:jobStatus2,:jobStatus3,:jobStatus4)", varMap) except Exception: pass # count # of getJob/updateJob in dispatcher's log try: # don't update when logrotate is running timeNow = datetime.datetime.utcnow() logRotateTime = timeNow.replace(hour=3, minute=2, second=0, microsecond=0) if (timeNow > logRotateTime and (timeNow-logRotateTime) < datetime.timedelta(minutes=5)) or \ (logRotateTime > timeNow and (logRotateTime-timeNow) < datetime.timedelta(minutes=5)): tmpLog.debug("skip pilotCounts session for logrotate") else: # log filename dispLogName = '%s/panda-PilotRequests.log' % panda_config.logdir # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta( hours=3) timeLimitS = datetime.datetime.utcnow() - datetime.timedelta( hours=1) # check if tgz is required com = 'head -1 %s' % dispLogName lostat, loout = commands_get_status_output(com) useLogTgz = True if lostat == 0: match = re.search('^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', loout) if match is not None: startTime = datetime.datetime(*time.strptime( match.group(0), '%Y-%m-%d %H:%M:%S')[:6]) # current log contains all info if startTime < timeLimit: useLogTgz = False # log files dispLogNameList = [dispLogName] if useLogTgz: today = datetime.date.today() dispLogNameList.append('{0}-{1}.gz'.format( dispLogName, today.strftime('%Y%m%d'))) # delete tmp commands_get_status_output('rm -f %s.tmp-*' % dispLogName) # tmp name tmpLogName = '%s.tmp-%s' % (dispLogName, datetime.datetime.utcnow( ).strftime('%Y-%m-%d-%H-%M-%S')) # loop over all files pilotCounts = {} pilotCountsS = {} for tmpDispLogName in dispLogNameList: # expand or copy if tmpDispLogName.endswith('.gz'): com = 'gunzip -c %s > %s' % (tmpDispLogName, tmpLogName) else: com = 'cp %s %s' % (tmpDispLogName, tmpLogName) lostat, loout = commands_get_status_output(com) if lostat != 0: errMsg = 'failed to expand/copy %s with : %s' % ( tmpDispLogName, loout) raise RuntimeError(errMsg) # search string sStr = '^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*' sStr += 'method=(.+),site=(.+),node=(.+),type=(.+)' # read logFH = open(tmpLogName) for line in logFH: # check format match = re.search(sStr, line) if match is not None: # check timerange timeStamp = datetime.datetime(*time.strptime( match.group(1), '%Y-%m-%d %H:%M:%S')[:6]) if timeStamp < timeLimit: continue tmpMethod = match.group(2) tmpSite = match.group(3) tmpNode = match.group(4) tmpType = match.group(5) # protection against corrupted entries from pilot, # e.g. pilot reading site json from cvmfs while it was being updated if tmpSite not in aSiteMapper.siteSpecList: continue # sum pilotCounts.setdefault(tmpSite, {}) pilotCounts[tmpSite].setdefault(tmpMethod, {}) pilotCounts[tmpSite][tmpMethod].setdefault(tmpNode, 0) pilotCounts[tmpSite][tmpMethod][tmpNode] += 1 # short if timeStamp > timeLimitS: if tmpSite not in pilotCountsS: pilotCountsS[tmpSite] = dict() if tmpMethod not in pilotCountsS[tmpSite]: pilotCountsS[tmpSite][tmpMethod] = dict() if tmpNode not in pilotCountsS[tmpSite][tmpMethod]: pilotCountsS[tmpSite][tmpMethod][tmpNode] = 0 pilotCountsS[tmpSite][tmpMethod][tmpNode] += 1 # close logFH.close() # delete tmp commands_get_status_output('rm %s' % tmpLogName) # update hostID = panda_config.pserverhost.split('.')[0] tmpLog.debug("pilotCounts session") retPC = taskBuffer.updateSiteData(hostID, pilotCounts, interval=3) tmpLog.debug(retPC) retPC = taskBuffer.updateSiteData(hostID, pilotCountsS, interval=1) tmpLog.debug(retPC) except Exception: errType, errValue = sys.exc_info()[:2] tmpLog.error("updateJob/getJob : %s %s" % (errType, errValue)) # nRunning tmpLog.debug("nRunning session") try: if (currentMinute / panda_config.nrun_interval ) % panda_config.nrun_hosts == panda_config.nrun_snum: retNR = taskBuffer.insertnRunningInSiteData() tmpLog.debug(retNR) except Exception: errType, errValue = sys.exc_info()[:2] tmpLog.error("nRunning : %s %s" % (errType, errValue)) # session for co-jumbo jobs tmpLog.debug("co-jumbo session") try: ret = taskBuffer.getCoJumboJobsToBeFinished(30, 0, 1000) if ret is None: tmpLog.debug("failed to get co-jumbo jobs to finish") else: coJumboA, coJumboD, coJumboW, coJumboTokill = ret tmpLog.debug("finish {0} co-jumbo jobs in Active".format( len(coJumboA))) if len(coJumboA) > 0: jobSpecs = taskBuffer.peekJobs(coJumboA, fromDefined=False, fromActive=True, fromArchived=False, fromWaiting=False) for jobSpec in jobSpecs: fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI( jobSpec) if not fileCheckInJEDI: jobSpec.jobStatus = 'closed' jobSpec.jobSubStatus = 'cojumbo_wrong' jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn taskBuffer.archiveJobs([jobSpec], False) tmpLog.debug("finish {0} co-jumbo jobs in Defined".format( len(coJumboD))) if len(coJumboD) > 0: jobSpecs = taskBuffer.peekJobs(coJumboD, fromDefined=True, fromActive=False, fromArchived=False, fromWaiting=False) for jobSpec in jobSpecs: fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI( jobSpec) if not fileCheckInJEDI: jobSpec.jobStatus = 'closed' jobSpec.jobSubStatus = 'cojumbo_wrong' jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn taskBuffer.archiveJobs([jobSpec], True) tmpLog.debug("finish {0} co-jumbo jobs in Waiting".format( len(coJumboW))) if len(coJumboW) > 0: jobSpecs = taskBuffer.peekJobs(coJumboW, fromDefined=False, fromActive=False, fromArchived=False, fromWaiting=True) for jobSpec in jobSpecs: fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI( jobSpec) if not fileCheckInJEDI: jobSpec.jobStatus = 'closed' jobSpec.jobSubStatus = 'cojumbo_wrong' jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn taskBuffer.archiveJobs([jobSpec], False, True) tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format( len(coJumboTokill))) if len(coJumboTokill) > 0: jediJobs = list(coJumboTokill) nJob = 100 iJob = 0 while iJob < len(jediJobs): tmpLog.debug(' killing %s' % str(jediJobs[iJob:iJob + nJob])) Client.killJobs(jediJobs[iJob:iJob + nJob], 51, keepUnmerged=True) iJob += nJob except Exception: errStr = traceback.format_exc() tmpLog.error(errStr) tmpLog.debug("Fork session") # thread for fork class ForkThr(threading.Thread): def __init__(self, fileName): threading.Thread.__init__(self) self.fileName = fileName def run(self): if 'VIRTUAL_ENV' in os.environ: prefix = os.environ['VIRTUAL_ENV'] else: prefix = '' setupStr = 'source {0}/etc/sysconfig/panda_server; '.format(prefix) runStr = '%s/python -Wignore ' % panda_config.native_python runStr += panda_config.pandaPython_dir + '/dataservice/forkSetupper.py -i ' runStr += self.fileName if self.fileName.split('/')[-1].startswith('set.NULL.'): runStr += ' -t' comStr = setupStr + runStr tmpLog.debug(comStr) commands_get_status_output(comStr) # get set.* files filePatt = panda_config.logdir + '/' + 'set.*' fileList = glob.glob(filePatt) # the max number of threads maxThr = 10 nThr = 0 # loop over all files forkThrList = [] timeNow = datetime.datetime.utcnow() for tmpName in fileList: if not os.path.exists(tmpName): continue try: # takes care of only recent files modTime = datetime.datetime( *(time.gmtime(os.path.getmtime(tmpName))[:7])) if (timeNow - modTime) > datetime.timedelta(minutes=1) and \ (timeNow - modTime) < datetime.timedelta(hours=1): cSt, cOut = commands_get_status_output( 'ps aux | grep fork | grep -v PYTH') # if no process is running for the file if cSt == 0 and tmpName not in cOut: nThr += 1 thr = ForkThr(tmpName) thr.start() forkThrList.append(thr) if nThr > maxThr: break except Exception: errType, errValue = sys.exc_info()[:2] tmpLog.error("%s %s" % (errType, errValue)) # join fork threads for thr in forkThrList: thr.join() # terminate TaskBuffer IF # taskBufferIF.terminate() tmpLog.debug("===================== end =====================")
def application(environ, start_response): # get method name methodName = '' if 'SCRIPT_NAME' in environ: methodName = environ['SCRIPT_NAME'].split('/')[-1] tmpLog = LogWrapper(_logger, "PID={0} {1}".format(os.getpid(), methodName), seeMem=True) cont_length = int(environ.get('CONTENT_LENGTH', 0)) json_body = environ.get('CONTENT_TYPE', None) == 'application/json' tmpLog.debug("start content-length={} json={}".format(cont_length, json_body)) regStart = datetime.datetime.utcnow() retType = None # check method name if methodName not in allowedMethods: tmpLog.error("is forbidden") exeRes = "False : %s is forbidden" % methodName else: # get method object tmpMethod = None try: tmpMethod = globals()[methodName] except Exception: pass # object not found if tmpMethod is None: tmpLog.error("is undefined") exeRes = "False" else: body = b'' try: # dummy request object dummyReq = DummyReq(environ, tmpLog) if not dummyReq.authenticated: start_response('403 Forbidden', [('Content-Type', 'text/plain')]) return ["ERROR : Token authentication failed on the server side. {}".format( dummyReq.message).encode()] username = dummyReq.subprocess_env.get('SSL_CLIENT_S_DN', None) if username: username = CoreUtils.clean_user_id(username) if username in ban_user_list: errMsg = '{} is banned'.format(username) tmpLog.warning(errMsg) start_response('403 Forbidden', [('Content-Type', 'text/plain')]) return ["ERROR : {}".format(errMsg).encode()] # read contents while cont_length > 0: chunk = environ['wsgi.input'].read(min(cont_length, 1024*1024)) if not chunk: break cont_length -= len(chunk) body += chunk if cont_length > 0: raise OSError('partial read from client. {} bytes remaining'.format(cont_length)) if not json_body: # query string environ['wsgi.input'] = io.BytesIO(body) # get params tmpPars = cgi.FieldStorage(environ['wsgi.input'], environ=environ, keep_blank_values=1) # convert to map params = {} for tmpKey in list(tmpPars): if tmpPars[tmpKey].file is not None and tmpPars[tmpKey].filename is not None: # file params[tmpKey] = tmpPars[tmpKey] else: # string params[tmpKey] = tmpPars.getfirst(tmpKey) else: # json body = gzip.decompress(body) params = json.loads(body) if panda_config.entryVerbose: tmpLog.debug("with %s" % str(list(params))) param_list = [dummyReq] # exec exeRes = tmpMethod(*param_list, **params) # extract return type if isinstance(exeRes, dict): retType = exeRes['type'] exeRes = exeRes['content'] # convert bool to string if exeRes in [True,False]: exeRes = str(exeRes) except Exception as e: tmpLog.error("execution failure : {0}\n {1}".format(str(e), traceback.format_exc())) if hasattr(panda_config, 'dumpBadRequest') and panda_config.dumpBadRequest: try: with tempfile.NamedTemporaryFile(delete=False, prefix='req_dump_') as f: environ['WSGI_INPUT_DUMP'] = f.name f.write(body) os.chmod(f.name, 0o775) except Exception: tmpLog.error(traceback.format_exc()) pass errStr = "" for tmpKey in environ: tmpVal = environ[tmpKey] errStr += "%s : %s\n" % (tmpKey,str(tmpVal)) tmpLog.error(errStr) # return internal server error start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')]) # force kill to release memory if type(e) == OSError: tmpLog.warning('force restart due') os.kill(os.getpid(), signal.SIGINT) return [str(e).encode()] if panda_config.entryVerbose: tmpLog.debug("done") regTime = datetime.datetime.utcnow() - regStart tmpLog.info("exec_time=%s.%03d sec, return len=%s B" % (regTime.seconds, regTime.microseconds/1000, len(str(exeRes)))) # return if exeRes == pandaserver.taskbuffer.ErrorCode.EC_NotFound: start_response('404 Not Found', [('Content-Type', 'text/plain')]) return ['not found'.encode()] elif isinstance(exeRes, pandaserver.taskbuffer.ErrorCode.EC_Redirect): start_response('302 Redirect', [('Location', exeRes.url)]) return ['redirect'.encode()] else: if retType == 'json': start_response('200 OK', [('Content-Type', 'application/json')]) else: start_response('200 OK', [('Content-Type', 'text/plain')]) if isinstance(exeRes, str): exeRes = exeRes.encode() return [exeRes]