예제 #1
0
 def __init__(self, taskBuffer, jobs, logger, params, defaultMap):
     self.jobs = []
     self.jumboJobs = []
     # separate normal and jumbo jobs
     for tmpJob in jobs:
         if EventServiceUtils.isJumboJob(tmpJob):
             self.jumboJobs.append(tmpJob)
         else:
             self.jobs.append(tmpJob)
     self.taskBuffer = taskBuffer
     self.logger = logger
     # set named parameters
     for tmpKey in params:
         tmpVal = params[tmpKey]
         setattr(self, tmpKey, tmpVal)
     # set defaults
     for tmpKey in defaultMap:
         tmpVal = defaultMap[tmpKey]
         if not hasattr(self, tmpKey):
             setattr(self, tmpKey, tmpVal)
예제 #2
0
 def parseXML(self):
     # get LFN and GUID
     # self.logger.debug('XML filename : %s' % self.xmlFile)
     # no outputs
     log_out = [f for f in self.job.Files if f.type in ['log', 'output']]
     if not log_out:
         self.logger.debug("has no outputs")
         self.logger.debug("parseXML end")
         return 0
     # get input files
     inputLFNs = []
     for file in self.job.Files:
         if file.type == 'input':
             inputLFNs.append(file.lfn)
     # parse XML
     lfns = []
     guids = []
     fsizes = []
     md5sums = []
     chksums = []
     surls = []
     fullLfnMap = {}
     nEventsMap = {}
     guidMap = dict()
     try:
         # root  = xml.dom.minidom.parse(self.xmlFile)
         root = xml.dom.minidom.parseString(self.data)
         files = root.getElementsByTagName('File')
         for file in files:
             # get GUID
             guid = str(file.getAttribute('ID'))
             # get PFN and LFN nodes
             logical = file.getElementsByTagName('logical')[0]
             lfnNode = logical.getElementsByTagName('lfn')[0]
             # convert UTF8 to Raw
             lfn = str(lfnNode.getAttribute('name'))
             # get metadata
             fsize = None
             md5sum = None
             adler32 = None
             surl = None
             fullLFN = None
             for meta in file.getElementsByTagName('metadata'):
                 # get fsize
                 name = str(meta.getAttribute('att_name'))
                 if name == 'fsize':
                     fsize = long(meta.getAttribute('att_value'))
                 elif name == 'md5sum':
                     md5sum = str(meta.getAttribute('att_value'))
                     # check
                     if re.search("^[a-fA-F0-9]{32}$", md5sum) is None:
                         md5sum = None
                 elif name == 'adler32':
                     adler32 = str(meta.getAttribute('att_value'))
                 elif name == 'surl':
                     surl = str(meta.getAttribute('att_value'))
                 elif name == 'full_lfn':
                     fullLFN = str(meta.getAttribute('att_value'))
             # endpoints
             self.extraInfo['endpoint'][lfn] = []
             for epNode in file.getElementsByTagName('endpoint'):
                 self.extraInfo['endpoint'][lfn].append(
                     str(epNode.firstChild.data))
             # error check
             if (lfn not in inputLFNs) and (fsize is None or
                                            (md5sum is None
                                             and adler32 is None)):
                 if EventServiceUtils.isEventServiceMerge(self.job):
                     continue
                 else:
                     raise RuntimeError('fsize/md5sum/adler32/surl=None')
             # append
             lfns.append(lfn)
             guids.append(guid)
             fsizes.append(fsize)
             md5sums.append(md5sum)
             surls.append(surl)
             if adler32 is not None:
                 # use adler32 if available
                 chksums.append("ad:%s" % adler32)
             else:
                 chksums.append("md5:%s" % md5sum)
             if fullLFN is not None:
                 fullLfnMap[lfn] = fullLFN
     except Exception:
         # parse json
         try:
             import json
             # with open(self.xmlFile) as tmpF:
             jsonDict = json.loads(self.data)
             for lfn in jsonDict:
                 fileData = jsonDict[lfn]
                 lfn = str(lfn)
                 fsize = None
                 md5sum = None
                 adler32 = None
                 surl = None
                 fullLFN = None
                 guid = str(fileData['guid'])
                 if 'fsize' in fileData:
                     fsize = long(fileData['fsize'])
                 if 'md5sum' in fileData:
                     md5sum = str(fileData['md5sum'])
                     # check
                     if re.search("^[a-fA-F0-9]{32}$", md5sum) is None:
                         md5sum = None
                 if 'adler32' in fileData:
                     adler32 = str(fileData['adler32'])
                 if 'surl' in fileData:
                     surl = str(fileData['surl'])
                 if 'full_lfn' in fileData:
                     fullLFN = str(fileData['full_lfn'])
                 # endpoints
                 self.extraInfo['endpoint'][lfn] = []
                 if 'endpoint' in fileData:
                     self.extraInfo['endpoint'][lfn] = fileData['endpoint']
                 # error check
                 if (lfn not in inputLFNs) and (fsize is None or
                                                (md5sum is None
                                                 and adler32 is None)):
                     if EventServiceUtils.isEventServiceMerge(self.job):
                         continue
                     else:
                         raise RuntimeError(
                             'fsize/md5sum/adler32/surl=None')
                 # append
                 lfns.append(lfn)
                 guids.append(guid)
                 fsizes.append(fsize)
                 md5sums.append(md5sum)
                 surls.append(surl)
                 if adler32 is not None:
                     # use adler32 if available
                     chksums.append("ad:%s" % adler32)
                 else:
                     chksums.append("md5:%s" % md5sum)
                 if fullLFN is not None:
                     fullLfnMap[lfn] = fullLFN
         except Exception:
             # check if file exists
             # if os.path.exists(self.xmlFile):
             if True:
                 type, value, traceBack = sys.exc_info()
                 self.logger.error(": %s %s" % (type, value))
                 # set failed anyway
                 self.job.jobStatus = 'failed'
                 # XML error happens when pilot got killed due to wall-time limit or failures in wrapper
                 if (self.job.pilotErrorCode in [0,'0','NULL']) and \
                    (self.job.taskBufferErrorCode not in [pandaserver.taskbuffer.ErrorCode.EC_WorkerDone]) and \
                    (self.job.transExitCode  in [0,'0','NULL']):
                     self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
                     self.job.ddmErrorDiag = "Could not get GUID/LFN/MD5/FSIZE/SURL from pilot XML"
                 return 2
             else:
                 # XML was deleted
                 return 1
     # parse metadata to get nEvents
     nEventsFrom = None
     try:
         root = xml.dom.minidom.parseString(self.job.metadata)
         files = root.getElementsByTagName('File')
         for file in files:
             # get GUID
             guid = str(file.getAttribute('ID'))
             # get PFN and LFN nodes
             logical = file.getElementsByTagName('logical')[0]
             lfnNode = logical.getElementsByTagName('lfn')[0]
             # convert UTF8 to Raw
             lfn = str(lfnNode.getAttribute('name'))
             guidMap[lfn] = guid
             # get metadata
             nevents = None
             for meta in file.getElementsByTagName('metadata'):
                 # get fsize
                 name = str(meta.getAttribute('att_name'))
                 if name == 'events':
                     nevents = long(meta.getAttribute('att_value'))
                     nEventsMap[lfn] = nevents
                     break
         nEventsFrom = "xml"
     except Exception:
         pass
     # parse json
     try:
         import json
         jsonDict = json.loads(self.job.metadata)
         for jsonFileItem in jsonDict['files']['output']:
             for jsonSubFileItem in jsonFileItem['subFiles']:
                 lfn = str(jsonSubFileItem['name'])
                 try:
                     nevents = long(jsonSubFileItem['nentries'])
                     nEventsMap[lfn] = nevents
                 except Exception:
                     pass
                 try:
                     guid = str(jsonSubFileItem['file_guid'])
                     guidMap[lfn] = guid
                 except Exception:
                     pass
         nEventsFrom = "json"
     except Exception:
         pass
     # use nEvents and GUIDs reported by the pilot if no job report
     if self.job.metadata == 'NULL' and self.jobStatus == 'finished' and self.job.nEvents > 0 \
             and self.job.prodSourceLabel in ['managed']:
         for file in self.job.Files:
             if file.type == 'output':
                 nEventsMap[file.lfn] = self.job.nEvents
         for lfn, guid in zip(lfns, guids):
             guidMap[lfn] = guid
         nEventsFrom = "pilot"
     self.logger.debug('nEventsMap=%s' % str(nEventsMap))
     self.logger.debug('nEventsFrom=%s' % str(nEventsFrom))
     self.logger.debug('guidMap=%s' % str(guidMap))
     self.logger.debug('self.job.jobStatus=%s in parseXML' %
                       self.job.jobStatus)
     self.logger.debug(
         'isES=%s isJumbo=%s' % (EventServiceUtils.isEventServiceJob(
             self.job), EventServiceUtils.isJumboJob(self.job)))
     # get lumi block number
     lumiBlockNr = self.job.getLumiBlockNr()
     # copy files for variable number of outputs
     tmpStat = self.copyFilesForVariableNumOutputs(lfns)
     if not tmpStat:
         self.logger.error(
             "failed to copy files for variable number of outputs")
         return 2
     # check files
     fileList = []
     for file in self.job.Files:
         fileList.append(file.lfn)
         if file.type == 'input':
             if file.lfn in lfns:
                 if self.job.prodSourceLabel in ['user', 'panda']:
                     # skipped file
                     file.status = 'skipped'
                 elif self.job.prodSourceLabel in [
                         'managed', 'test'
                 ] + JobUtils.list_ptest_prod_sources:
                     # failed by pilot
                     file.status = 'failed'
         elif file.type == 'output' or file.type == 'log':
             # add only log file for failed jobs
             if self.jobStatus == 'failed' and file.type != 'log':
                 file.status = 'failed'
                 continue
             # set failed if it is missing in XML
             if file.lfn not in lfns:
                 if (self.job.jobStatus == 'finished' and EventServiceUtils.isEventServiceJob(self.job)) \
                         or EventServiceUtils.isJumboJob(self.job):
                     # unset file status for ES jobs
                     pass
                 elif file.isAllowedNoOutput():
                     # allowed not to be produced
                     file.status = 'nooutput'
                     self.logger.debug('set {0} to status={1}'.format(
                         file.lfn, file.status))
                 else:
                     file.status = 'failed'
                     self.job.jobStatus = 'failed'
                     self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
                     self.job.ddmErrorDiag = "expected output {0} is missing in pilot XML".format(
                         file.lfn)
                     self.logger.error(self.job.ddmErrorDiag)
                 continue
             # look for GUID with LFN
             try:
                 i = lfns.index(file.lfn)
                 file.GUID = guids[i]
                 file.fsize = fsizes[i]
                 file.md5sum = md5sums[i]
                 file.checksum = chksums[i]
                 surl = surls[i]
                 # status
                 file.status = 'ready'
                 # change to full LFN
                 if file.lfn in fullLfnMap:
                     file.lfn = fullLfnMap[file.lfn]
                 # add SURL to extraInfo
                 self.extraInfo['surl'][file.lfn] = surl
                 # add nevents
                 if file.lfn in nEventsMap:
                     self.extraInfo['nevents'][file.lfn] = nEventsMap[
                         file.lfn]
             except Exception:
                 # status
                 file.status = 'failed'
                 type, value, traceBack = sys.exc_info()
                 self.logger.error(": %s %s" % (type, value))
             # set lumi block number
             if lumiBlockNr is not None and file.status != 'failed':
                 self.extraInfo['lbnr'][file.lfn] = lumiBlockNr
     self.extraInfo['guid'] = guidMap
     # check consistency between XML and filesTable
     for lfn in lfns:
         if lfn not in fileList:
             self.logger.error("%s is not found in filesTable" % lfn)
             self.job.jobStatus = 'failed'
             for tmpFile in self.job.Files:
                 tmpFile.status = 'failed'
             self.job.ddmErrorCode = pandaserver.dataservice.ErrorCode.EC_Adder
             self.job.ddmErrorDiag = "pilot produced {0} inconsistently with jobdef".format(
                 lfn)
             return 2
     # return
     self.logger.debug("parseXML end")
     return 0
예제 #3
0
 def appendJob(self, job, siteMapperCache=None):
     # event service merge
     if EventServiceUtils.isEventServiceMerge(job):
         isEventServiceMerge = True
     else:
         isEventServiceMerge = False
     # PandaID
     self.data['PandaID'] = job.PandaID
     # prodSourceLabel
     self.data['prodSourceLabel'] = job.prodSourceLabel
     # swRelease
     self.data['swRelease'] = job.AtlasRelease
     # homepackage
     self.data['homepackage'] = job.homepackage
     # transformation
     self.data['transformation'] = job.transformation
     # job name
     self.data['jobName'] = job.jobName
     # job definition ID
     self.data['jobDefinitionID'] = job.jobDefinitionID
     # cloud
     self.data['cloud'] = job.cloud
     # files
     strIFiles = ''
     strOFiles = ''
     strDispatch = ''
     strDisToken = ''
     strDisTokenForOutput = ''
     strDestination = ''
     strRealDataset = ''
     strRealDatasetIn = ''
     strProdDBlock = ''
     strDestToken = ''
     strProdToken = ''
     strProdTokenForOutput = ''
     strGUID = ''
     strFSize = ''
     strCheckSum = ''
     strFileDestinationSE = ''
     strScopeIn = ''
     strScopeOut = ''
     strScopeLog = ''
     logFile = ''
     logGUID = ''
     ddmEndPointIn = []
     ddmEndPointOut = []
     noOutput = []
     siteSpec = None
     inDsLfnMap = {}
     inLFNset = set()
     if siteMapperCache is not None:
         siteMapper = siteMapperCache.getObj()
         siteSpec = siteMapper.getSite(job.computingSite)
         # resolve destSE
         try:
             job.destinationSE = siteMapper.resolveNucleus(
                 job.destinationSE)
             for tmpFile in job.Files:
                 tmpFile.destinationSE = siteMapper.resolveNucleus(
                     tmpFile.destinationSE)
         except Exception:
             pass
         siteMapperCache.releaseObj()
     for file in job.Files:
         if file.type == 'input':
             if EventServiceUtils.isJumboJob(job) and file.lfn in inLFNset:
                 pass
             else:
                 inLFNset.add(file.lfn)
                 if strIFiles != '':
                     strIFiles += ','
                 strIFiles += file.lfn
                 if strDispatch != '':
                     strDispatch += ','
                 strDispatch += file.dispatchDBlock
                 if strDisToken != '':
                     strDisToken += ','
                 strDisToken += file.dispatchDBlockToken
                 strProdDBlock += '%s,' % file.prodDBlock
                 if not isEventServiceMerge:
                     strProdToken += '%s,' % file.prodDBlockToken
                 else:
                     strProdToken += '%s,' % job.metadata[1][file.lfn]
                 if strGUID != '':
                     strGUID += ','
                 strGUID += file.GUID
                 strRealDatasetIn += '%s,' % file.dataset
                 strFSize += '%s,' % file.fsize
                 if file.checksum not in ['', 'NULL', None]:
                     strCheckSum += '%s,' % file.checksum
                 else:
                     strCheckSum += '%s,' % file.md5sum
                 strScopeIn += '%s,' % file.scope
                 ddmEndPointIn.append(
                     self.getDdmEndpoint(siteSpec, file.dispatchDBlockToken,
                                         'input', job.prodSourceLabel,
                                         job.job_label))
                 if file.dataset not in inDsLfnMap:
                     inDsLfnMap[file.dataset] = []
                 inDsLfnMap[file.dataset].append(file.lfn)
         if file.type == 'output' or file.type == 'log':
             if strOFiles != '':
                 strOFiles += ','
             strOFiles += file.lfn
             if strDestination != '':
                 strDestination += ','
             strDestination += file.destinationDBlock
             if strRealDataset != '':
                 strRealDataset += ','
             strRealDataset += file.dataset
             strFileDestinationSE += '%s,' % file.destinationSE
             if file.type == 'log':
                 logFile = file.lfn
                 logGUID = file.GUID
                 strScopeLog = file.scope
             else:
                 strScopeOut += '%s,' % file.scope
             if strDestToken != '':
                 strDestToken += ','
             strDestToken += re.sub(
                 '^ddd:', 'dst:',
                 file.destinationDBlockToken.split(',')[0])
             strDisTokenForOutput += '%s,' % file.dispatchDBlockToken
             strProdTokenForOutput += '%s,' % file.prodDBlockToken
             ddmEndPointOut.append(
                 self.getDdmEndpoint(
                     siteSpec,
                     file.destinationDBlockToken.split(',')[0], 'output',
                     job.prodSourceLabel, job.job_label))
             if file.isAllowedNoOutput():
                 noOutput.append(file.lfn)
     # inFiles
     self.data['inFiles'] = strIFiles
     # dispatch DBlock
     self.data['dispatchDblock'] = strDispatch
     # dispatch DBlock space token
     self.data['dispatchDBlockToken'] = strDisToken
     # dispatch DBlock space token for output
     self.data['dispatchDBlockTokenForOut'] = strDisTokenForOutput[:-1]
     # outFiles
     self.data['outFiles'] = strOFiles
     # destination DBlock
     self.data['destinationDblock'] = strDestination
     # destination DBlock space token
     self.data['destinationDBlockToken'] = strDestToken
     # prod DBlocks
     self.data['prodDBlocks'] = strProdDBlock[:-1]
     # prod DBlock space token
     self.data['prodDBlockToken'] = strProdToken[:-1]
     # real output datasets
     self.data['realDatasets'] = strRealDataset
     # real output datasets
     self.data['realDatasetsIn'] = strRealDatasetIn[:-1]
     # file's destinationSE
     self.data['fileDestinationSE'] = strFileDestinationSE[:-1]
     # log filename
     self.data['logFile'] = logFile
     # log GUID
     self.data['logGUID'] = logGUID
     # jobPars
     self.data['jobPars'], ppSteps = job.extractMultiStepExec()
     if ppSteps is not None:
         self.data.update(ppSteps)
     if job.to_encode_job_params():
         self.data['jobPars'] = base64.b64encode(
             self.data['jobPars'].encode()).decode()
     # attempt number
     self.data['attemptNr'] = job.attemptNr
     # GUIDs
     self.data['GUID'] = strGUID
     # checksum
     self.data['checksum'] = strCheckSum[:-1]
     # fsize
     self.data['fsize'] = strFSize[:-1]
     # scope
     self.data['scopeIn'] = strScopeIn[:-1]
     self.data['scopeOut'] = strScopeOut[:-1]
     self.data['scopeLog'] = strScopeLog
     # DDM endpoints
     try:
         self.data['ddmEndPointIn'] = ','.join(ddmEndPointIn)
     except TypeError:
         self.data['ddmEndPointIn'] = ''
     try:
         self.data['ddmEndPointOut'] = ','.join(ddmEndPointOut)
     except TypeError:
         self.data['ddmEndPointOut'] = ''
     # destinationSE
     self.data['destinationSE'] = job.destinationSE
     # user ID
     self.data['prodUserID'] = job.prodUserID
     # CPU count
     self.data['maxCpuCount'] = job.maxCpuCount
     # RAM count
     self.data['minRamCount'] = job.minRamCount
     # disk count
     self.data['maxDiskCount'] = job.maxDiskCount
     # cmtconfig
     if ppSteps is None:
         self.data['cmtConfig'] = job.cmtConfig
     else:
         self.data['cmtConfig'] = ''
     # processingType
     self.data['processingType'] = job.processingType
     # transferType
     self.data['transferType'] = job.transferType
     # sourceSite
     self.data['sourceSite'] = job.sourceSite
     # current priority
     self.data['currentPriority'] = job.currentPriority
     # taskID
     if job.lockedby == 'jedi':
         self.data['taskID'] = job.jediTaskID
     else:
         self.data['taskID'] = job.taskID
     # core count
     if job.coreCount in ['NULL', None]:
         self.data['coreCount'] = 1
     else:
         self.data['coreCount'] = job.coreCount
     # jobsetID
     self.data['jobsetID'] = job.jobsetID
     # nucleus
     self.data['nucleus'] = job.nucleus
     # walltime
     self.data['maxWalltime'] = job.maxWalltime
     # looping check
     if job.is_no_looping_check():
         self.data['loopingCheck'] = False
     # debug mode
     if job.specialHandling is not None and 'debug' in job.specialHandling:
         self.data['debug'] = 'True'
     # event service or job cloning
     if EventServiceUtils.isJobCloningJob(job):
         self.data['cloneJob'] = EventServiceUtils.getJobCloningType(job)
     elif EventServiceUtils.isEventServiceJob(
             job) or EventServiceUtils.isJumboJob(job):
         self.data['eventService'] = 'True'
         # prod DBlock space token for pre-merging output
         self.data['prodDBlockTokenForOutput'] = strProdTokenForOutput[:-1]
     # event service merge
     if isEventServiceMerge:
         self.data['eventServiceMerge'] = 'True'
         # write to file for ES merge
         writeToFileStr = ''
         try:
             for outputName in job.metadata[0]:
                 inputList = job.metadata[0][outputName]
                 writeToFileStr += 'inputFor_{0}:'.format(outputName)
                 for tmpInput in inputList:
                     writeToFileStr += '{0},'.format(tmpInput)
                 writeToFileStr = writeToFileStr[:-1]
                 writeToFileStr += '^'
             writeToFileStr = writeToFileStr[:-1]
         except Exception:
             pass
         self.data['writeToFile'] = writeToFileStr
     elif job.writeInputToFile():
         try:
             # write input to file
             writeToFileStr = ''
             for inDS in inDsLfnMap:
                 inputList = inDsLfnMap[inDS]
                 inDS = re.sub('/$', '', inDS)
                 inDS = inDS.split(':')[-1]
                 writeToFileStr += 'tmpin_{0}:'.format(inDS)
                 writeToFileStr += ','.join(inputList)
                 writeToFileStr += '^'
             writeToFileStr = writeToFileStr[:-1]
             self.data['writeToFile'] = writeToFileStr
         except Exception:
             pass
     # replace placeholder
     if EventServiceUtils.isJumboJob(job) or EventServiceUtils.isCoJumboJob(
             job):
         try:
             for inDS in inDsLfnMap:
                 inputList = inDsLfnMap[inDS]
                 inDS = re.sub('/$', '', inDS)
                 inDS = inDS.split(':')[-1]
                 srcStr = 'tmpin__cnt_{0}'.format(inDS)
                 dstStr = ','.join(inputList)
                 self.data['jobPars'] = self.data['jobPars'].replace(
                     srcStr, dstStr)
         except Exception:
             pass
     # no output
     if noOutput != []:
         self.data['allowNoOutput'] = ','.join(noOutput)
     # alternative stage-out
     if job.getAltStgOut() is not None:
         self.data['altStageOut'] = job.getAltStgOut()
     # log to OS
     if job.putLogToOS():
         self.data['putLogToOS'] = 'True'
     # suppress execute string conversion
     if job.noExecStrCnv():
         self.data['noExecStrCnv'] = 'True'
     # in-file positional event number
     if job.inFilePosEvtNum():
         self.data['inFilePosEvtNum'] = 'True'
     # use prefetcher
     if job.usePrefetcher():
         self.data['usePrefetcher'] = 'True'
     # image name
     if job.container_name not in ['NULL', None]:
         self.data['container_name'] = job.container_name
     # IO
     self.data['ioIntensity'] = job.get_task_attribute('ioIntensity')
     self.data['ioIntensityUnit'] = job.get_task_attribute(
         'ioIntensityUnit')
     # HPO
     if job.is_hpo_workflow():
         self.data['isHPO'] = 'True'
     # VP
     if siteSpec is not None:
         scope_input, scope_output = DataServiceUtils.select_scope(
             siteSpec, job.prodSourceLabel, job.job_label)
         if siteSpec.use_vp(scope_input):
             self.data['useVP'] = 'True'