def createJob(self, name, nodes, walltime, command, inputs=None, queuename=None): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s" % commands.getoutput('uuidgen') job.VO = self.vo job.transformation = self.transformation job.destinationDBlock = self.datasetName job.destinationSE = self.destName job.currentPriority = self.currentPriority job.prodSourceLabel = self.prodSourceLabel job.computingSite = self.site if queuename is None else queuename lqcd_command = { "nodes": nodes, "walltime": walltime, "name": name, "command": command } job.jobParameters = json.dumps(lqcd_command) fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % job.jobName fileOL.destinationDBlock = job.destinationDBlock fileOL.destinationSE = job.destinationSE fileOL.dataset = job.destinationDBlock fileOL.type = 'log' job.addFile(fileOL) job.cmtConfig = inputs return job
from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from taskbuffer.DatasetSpec import DatasetSpec from taskbuffer.DBProxyPool import DBProxyPool import getpass passwd = getpass.getpass() pool = DBProxyPool('adbpro.usatlas.bnl.gov',passwd,2) proxy = pool.getProxy() import sys import commands job1 = JobSpec() job1.PandaID='NULL' job1.jobStatus='unknown' job1.computingSite="aaa" f11 = FileSpec() f11.lfn = 'in1.pool.root' f11.type = 'input' job1.addFile(f11) f12 = FileSpec() f12.lfn = 'out1.pool.root' f12.type = 'output' job1.addFile(f12) job2 = JobSpec() job2.PandaID='NULL' job2.jobStatus='unknown'
def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue): # variables for submission maxBunchTask = 100 # make logger tmpLog = MsgWrapper(logger) tmpLog.debug("start doBrokerage") # return for failure retFatal = self.SC_FATAL retTmpError = self.SC_FAILED tmpLog.debug("vo={0} label={1} queue={2}".format(vo, prodSourceLabel, workQueue.queue_name)) # loop over all tasks allRwMap = {} prioMap = {} tt2Map = {} expRWs = {} jobSpecList = [] for tmpJediTaskID, tmpInputList in inputList: for taskSpec, cloudName, inputChunk in tmpInputList: # make JobSpec to be submitted for TaskAssigner jobSpec = JobSpec() jobSpec.taskID = taskSpec.jediTaskID jobSpec.jediTaskID = taskSpec.jediTaskID # set managed to trigger TA jobSpec.prodSourceLabel = "managed" jobSpec.processingType = taskSpec.processingType jobSpec.workingGroup = taskSpec.workingGroup jobSpec.metadata = taskSpec.processingType jobSpec.assignedPriority = taskSpec.taskPriority jobSpec.currentPriority = taskSpec.currentPriority jobSpec.maxDiskCount = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024 if taskSpec.useWorldCloud(): # use destinationSE to trigger task brokerage in WORLD cloud jobSpec.destinationSE = taskSpec.cloud prodDBlock = None setProdDBlock = False for datasetSpec in inputChunk.getDatasets(): prodDBlock = datasetSpec.datasetName if datasetSpec.isMaster(): jobSpec.prodDBlock = datasetSpec.datasetName setProdDBlock = True for fileSpec in datasetSpec.Files: tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec) jobSpec.addFile(tmpInFileSpec) # use secondary dataset name as prodDBlock if setProdDBlock == False and prodDBlock != None: jobSpec.prodDBlock = prodDBlock # append jobSpecList.append(jobSpec) prioMap[jobSpec.taskID] = jobSpec.currentPriority tt2Map[jobSpec.taskID] = jobSpec.processingType # get RW for a priority if not allRwMap.has_key(jobSpec.currentPriority): tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI( vo, prodSourceLabel, workQueue, jobSpec.currentPriority ) if tmpRW == None: tmpLog.error("failed to calculate RW with prio={0}".format(jobSpec.currentPriority)) return retTmpError allRwMap[jobSpec.currentPriority] = tmpRW # get expected RW expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID) if expRW == None: tmpLog.error("failed to calculate RW for jediTaskID={0}".format(jobSpec.jediTaskID)) return retTmpError expRWs[jobSpec.taskID] = expRW # get fullRWs fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo, prodSourceLabel, None, None) if fullRWs == None: tmpLog.error("failed to calculate full RW") return retTmpError # set metadata for jobSpec in jobSpecList: rwValues = allRwMap[jobSpec.currentPriority] jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % ( jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap), str(fullRWs), str(tt2Map), ) tmpLog.debug("run task assigner for {0} tasks".format(len(jobSpecList))) nBunchTask = 0 while nBunchTask < len(jobSpecList): # get a bunch jobsBunch = jobSpecList[nBunchTask : nBunchTask + maxBunchTask] strIDs = "jediTaskID=" for tmpJobSpec in jobsBunch: strIDs += "{0},".format(tmpJobSpec.taskID) strIDs = strIDs[:-1] tmpLog.debug(strIDs) # increment index nBunchTask += maxBunchTask # run task brokerge stS, outSs = PandaClient.runTaskAssignment(jobsBunch) tmpLog.debug("{0}:{1}".format(stS, str(outSs))) # return tmpLog.debug("done") return self.SC_SUCCEEDED
def doBrokerage(self,inputList,vo,prodSourceLabel,workQueue): # list with a lock inputListWorld = ListWithLock([]) # variables for submission maxBunchTask = 100 # make logger tmpLog = MsgWrapper(logger) tmpLog.debug('start doBrokerage') # return for failure retFatal = self.SC_FATAL retTmpError = self.SC_FAILED tmpLog.debug('vo={0} label={1} queue={2} nTasks={3}'.format(vo,prodSourceLabel, workQueue.queue_name, len(inputList))) # loop over all tasks allRwMap = {} prioMap = {} tt2Map = {} expRWs = {} jobSpecList = [] for tmpJediTaskID,tmpInputList in inputList: for taskSpec,cloudName,inputChunk in tmpInputList: # collect tasks for WORLD if taskSpec.useWorldCloud(): inputListWorld.append((taskSpec,inputChunk)) continue # make JobSpec to be submitted for TaskAssigner jobSpec = JobSpec() jobSpec.taskID = taskSpec.jediTaskID jobSpec.jediTaskID = taskSpec.jediTaskID # set managed to trigger TA jobSpec.prodSourceLabel = 'managed' jobSpec.processingType = taskSpec.processingType jobSpec.workingGroup = taskSpec.workingGroup jobSpec.metadata = taskSpec.processingType jobSpec.assignedPriority = taskSpec.taskPriority jobSpec.currentPriority = taskSpec.currentPriority jobSpec.maxDiskCount = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024 if taskSpec.useWorldCloud(): # use destinationSE to trigger task brokerage in WORLD cloud jobSpec.destinationSE = taskSpec.cloud prodDBlock = None setProdDBlock = False for datasetSpec in inputChunk.getDatasets(): prodDBlock = datasetSpec.datasetName if datasetSpec.isMaster(): jobSpec.prodDBlock = datasetSpec.datasetName setProdDBlock = True for fileSpec in datasetSpec.Files: tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec) jobSpec.addFile(tmpInFileSpec) # use secondary dataset name as prodDBlock if setProdDBlock == False and prodDBlock != None: jobSpec.prodDBlock = prodDBlock # append jobSpecList.append(jobSpec) prioMap[jobSpec.taskID] = jobSpec.currentPriority tt2Map[jobSpec.taskID] = jobSpec.processingType # get RW for a priority if not allRwMap.has_key(jobSpec.currentPriority): tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,workQueue, jobSpec.currentPriority) if tmpRW == None: tmpLog.error('failed to calculate RW with prio={0}'.format(jobSpec.currentPriority)) return retTmpError allRwMap[jobSpec.currentPriority] = tmpRW # get expected RW expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID) if expRW == None: tmpLog.error('failed to calculate RW for jediTaskID={0}'.format(jobSpec.jediTaskID)) return retTmpError expRWs[jobSpec.taskID] = expRW # for old clouds if jobSpecList != []: # get fullRWs fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,None,None) if fullRWs == None: tmpLog.error('failed to calculate full RW') return retTmpError # set metadata for jobSpec in jobSpecList: rwValues = allRwMap[jobSpec.currentPriority] jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (jobSpec.metadata, str(rwValues),str(expRWs), str(prioMap),str(fullRWs), str(tt2Map)) tmpLog.debug('run task assigner for {0} tasks'.format(len(jobSpecList))) nBunchTask = 0 while nBunchTask < len(jobSpecList): # get a bunch jobsBunch = jobSpecList[nBunchTask:nBunchTask+maxBunchTask] strIDs = 'jediTaskID=' for tmpJobSpec in jobsBunch: strIDs += '{0},'.format(tmpJobSpec.taskID) strIDs = strIDs[:-1] tmpLog.debug(strIDs) # increment index nBunchTask += maxBunchTask # run task brokerge stS,outSs = PandaClient.runTaskAssignment(jobsBunch) tmpLog.debug('{0}:{1}'.format(stS,str(outSs))) # for WORLD if len(inputListWorld) > 0: # thread pool threadPool = ThreadPool() # get full RW for WORLD fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,None,None) if fullRWs == None: tmpLog.error('failed to calculate full WORLD RW') return retTmpError # get RW per priority for taskSpec,inputChunk in inputListWorld: if not taskSpec.currentPriority in allRwMap: tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,workQueue, taskSpec.currentPriority) if tmpRW == None: tmpLog.error('failed to calculate RW with prio={0}'.format(taskSpec.currentPriority)) return retTmpError allRwMap[taskSpec.currentPriority] = tmpRW # live counter for RWs liveCounter = MapWithLock(allRwMap) # make workers ddmIF = self.ddmIF.getInterface(vo) for iWorker in range(4): thr = AtlasProdTaskBrokerThread(inputListWorld,threadPool, self.taskBufferIF,ddmIF, fullRWs,liveCounter) thr.start() threadPool.join(60*10) # return tmpLog.debug('doBrokerage done') return self.SC_SUCCEEDED
if format == 'HITS': step = 'simul' # append oDatasets.append('%s.%s.%s.%s_tid%06d' % (m.group(1), step, format, m.group(3), int(taskID))) # log dataset lDataset = '%s.%s.%s.%s_tid%06d' % (m.group(1), m.group(2), 'log', m.group(3), int(taskID)) # instantiate JobSpecs iJob = 0 jobList = [] for line in taskFile: iJob += 1 job = JobSpec() # job ID ###### FIXME job.jobDefinitionID = int(time.time()) % 10000 # job name job.jobName = "%s_%05d.job" % (taskName, iJob) # AtlasRelease if len(re.findall('\.', trfVer)) > 2: match = re.search('^(\d+\.\d+\.\d+)', trfVer) job.AtlasRelease = 'Atlas-%s' % match.group(1) else: job.AtlasRelease = 'Atlas-%s' % trfVer # homepackage vers = trfVer.split('.') if int(vers[0]) <= 11: job.homepackage = 'JobTransforms' for ver in vers:
import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec if len(sys.argv)>1: site = sys.argv[1] else: site = None datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'BNL_ATLAS_2' jobList = [] for i in range(20): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = commands.getoutput('uuidgen') job.AtlasRelease = 'Atlas-11.0.41' #job.AtlasRelease = 'Atlas-11.0.3' job.homepackage = 'AnalysisTransforms' job.transformation = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 100 job.prodSourceLabel = 'user' job.computingSite = site #job.prodDBlock = "pandatest.b1599dfa-cd36-4fc5-92f6-495781a94c66" job.prodDBlock = "pandatest.f228b051-077b-4f81-90bf-496340644379" fileI = FileSpec()
from taskbuffer.FileSpec import FileSpec aSrvID = None for idx, argv in enumerate(sys.argv): if argv == '-s': aSrvID = sys.argv[idx + 1] sys.argv = sys.argv[:idx] break site = sys.argv[1] datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'local' job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s" % commands.getoutput('uuidgen') # MPI transform on Titan that will run actual job job.transformation = '/lustre/atlas/proj-shared/csc108/panitkin/alicetest1/m\ pi_wrapper_alice_ppbench.py' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 1000 job.prodSourceLabel = 'panda' job.computingSite = site job.jobParameters = " " job.VO = 'alice' fileOL = FileSpec()
if argv == '-s': aSrvID = sys.argv[idx+1] sys.argv = sys.argv[:idx] break #site = sys.argv[1] site = 'ANALY_BNL-LSST' #orig #site = 'BNL-LSST' #site = 'SWT2_CPB-LSST' #site = 'UTA_SWT2-LSST' #site = 'ANALY_SWT2_CPB-LSST' datasetName = 'panda.user.jschovan.lsst.%s' % commands.getoutput('uuidgen') destName = None job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s" % commands.getoutput('uuidgen') ### job.transformation = 'http://www.usatlas.bnl.gov/~wenaus/lsst-trf/lsst-trf.sh' #job.transformation = 'http://pandawms.org/pandawms-jobcache/lsst-trf.sh' job.transformation = 'http://pandawms.org/pandawms-jobcache/lsst-trf-phosim332.sh' job.destinationDBlock = datasetName #job.destinationSE = destName job.destinationSE = 'local' job.currentPriority = 1000 #job.prodSourceLabel = 'ptest' #job.prodSourceLabel = 'panda' #job.prodSourceLabel = 'ptest' #job.prodSourceLabel = 'test' #job.prodSourceLabel = 'ptest' ### 2014-01-27
import commands import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec if len(sys.argv)>1: site = sys.argv[1] else: site = None jobList = [] for i in range(2): datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'ANALY_BNL_ATLAS_1' job = JobSpec() job.jobDefinitionID = 1 job.jobName = commands.getoutput('uuidgen') job.AtlasRelease = 'Atlas-12.0.2' job.homepackage = 'AnalysisTransforms' job.transformation = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena2' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 3000 job.prodSourceLabel = 'user' job.computingSite = site job.prodDBlock = 'testIdeal_06.005001.pythia_minbias.recon.AOD.v12000103' fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % commands.getoutput('uuidgen') fileOL.destinationDBlock = job.destinationDBlock
else: site = None datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = None files = { 'daq.ATLAS.0092045.physics.RPCwBeam.LB0016.SFO-2._0009.data':None, } jobList = [] index = 0 for lfn in files.keys(): index += 1 job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),index) job.AtlasRelease = 'Atlas-14.4.0' job.homepackage = 'AtlasTier0/14.4.0.2' job.transformation = 'Reco_trf.py' job.destinationDBlock = datasetName job.destinationSE = destName job.computingSite = site job.prodDBlock = 'data08_cos.00092045.physics_RPCwBeam.daq.RAW.o4_T1224560091' job.prodSourceLabel = 'test' job.processingType = 'reprocessing' job.currentPriority = 10000 job.cloud = cloud job.cmtConfig = 'i686-slc4-gcc34-opt'
def run(self): try: _logger.debug('%s Start %s' % (self.pandaID,self.job.jobStatus)) flagComplete = True ddmJobs = [] topUserDsList = [] usingMerger = False disableNotifier = False firstIndvDS = True finalStatusDS = [] for destinationDBlock in self.destinationDBlocks: dsList = [] _logger.debug('%s start %s' % (self.pandaID,destinationDBlock)) # ignore tid datasets if re.search('_tid[\d_]+$',destinationDBlock): _logger.debug('%s skip %s' % (self.pandaID,destinationDBlock)) continue # ignore HC datasets if re.search('^hc_test\.',destinationDBlock) != None or re.search('^user\.gangarbt\.',destinationDBlock) != None: if re.search('_sub\d+$',destinationDBlock) == None and re.search('\.lib$',destinationDBlock) == None: _logger.debug('%s skip HC %s' % (self.pandaID,destinationDBlock)) continue # query dataset if self.datasetMap.has_key(destinationDBlock): dataset = self.datasetMap[destinationDBlock] else: dataset = self.taskBuffer.queryDatasetWithMap({'name':destinationDBlock}) if dataset == None: _logger.error('%s Not found : %s' % (self.pandaID,destinationDBlock)) flagComplete = False continue # skip tobedeleted/tobeclosed if dataset.status in ['cleanup','tobeclosed','completed']: _logger.debug('%s skip %s due to %s' % (self.pandaID,destinationDBlock,dataset.status)) continue dsList.append(dataset) # sort dsList.sort() # count number of completed files notFinish = self.taskBuffer.countFilesWithMap({'destinationDBlock':destinationDBlock, 'status':'unknown'}) if notFinish < 0: _logger.error('%s Invalid DB return : %s' % (self.pandaID,notFinish)) flagComplete = False continue # check if completed _logger.debug('%s notFinish:%s' % (self.pandaID,notFinish)) if self.job.destinationSE == 'local' and self.job.prodSourceLabel in ['user','panda']: # close non-DQ2 destinationDBlock immediately finalStatus = 'closed' elif self.job.lockedby == 'jedi' and self.isTopLevelDS(destinationDBlock): # set it closed in order not to trigger DDM cleanup. It will be closed by JEDI finalStatus = 'closed' elif self.job.prodSourceLabel in ['user'] and "--mergeOutput" in self.job.jobParameters \ and self.job.processingType != 'usermerge': # merge output files if firstIndvDS: # set 'tobemerged' to only the first dataset to avoid triggering many Mergers for --individualOutDS finalStatus = 'tobemerged' firstIndvDS = False else: finalStatus = 'tobeclosed' # set merging to top dataset usingMerger = True # disable Notifier disableNotifier = True elif self.job.produceUnMerge(): finalStatus = 'doing' else: # set status to 'tobeclosed' to trigger DQ2 closing finalStatus = 'tobeclosed' if notFinish==0: _logger.debug('%s set %s to dataset : %s' % (self.pandaID,finalStatus,destinationDBlock)) # set status dataset.status = finalStatus # update dataset in DB retT = self.taskBuffer.updateDatasets(dsList,withLock=True,withCriteria="status<>:crStatus AND status<>:lockStatus ", criteriaMap={':crStatus':finalStatus,':lockStatus':'locked'}) if len(retT) > 0 and retT[0]==1: finalStatusDS += dsList # close user datasets if self.job.prodSourceLabel in ['user'] and self.job.destinationDBlock.endswith('/') \ and (dataset.name.startswith('user') or dataset.name.startswith('group')): # get top-level user dataset topUserDsName = re.sub('_sub\d+$','',dataset.name) # update if it is the first attempt if topUserDsName != dataset.name and not topUserDsName in topUserDsList and self.job.lockedby != 'jedi': topUserDs = self.taskBuffer.queryDatasetWithMap({'name':topUserDsName}) if topUserDs != None: # check status if topUserDs.status in ['completed','cleanup','tobeclosed', 'tobemerged','merging']: _logger.debug('%s skip %s due to status=%s' % (self.pandaID,topUserDsName,topUserDs.status)) else: # set status if self.job.processingType.startswith('gangarobot') or \ self.job.processingType.startswith('hammercloud'): # not trigger freezing for HC datasets so that files can be appended topUserDs.status = 'completed' elif not usingMerger: topUserDs.status = finalStatus else: topUserDs.status = 'merging' # append to avoid repetition topUserDsList.append(topUserDsName) # update DB retTopT = self.taskBuffer.updateDatasets([topUserDs],withLock=True,withCriteria="status<>:crStatus", criteriaMap={':crStatus':topUserDs.status}) if len(retTopT) > 0 and retTopT[0]==1: _logger.debug('%s set %s to top dataset : %s' % (self.pandaID,topUserDs.status,topUserDsName)) else: _logger.debug('%s failed to update top dataset : %s' % (self.pandaID,topUserDsName)) # get parent dataset for merge job if self.job.processingType == 'usermerge': tmpMatch = re.search('--parentDS ([^ \'\"]+)',self.job.jobParameters) if tmpMatch == None: _logger.error('%s failed to extract parentDS' % self.pandaID) else: unmergedDsName = tmpMatch.group(1) # update if it is the first attempt if not unmergedDsName in topUserDsList: unmergedDs = self.taskBuffer.queryDatasetWithMap({'name':unmergedDsName}) if unmergedDs == None: _logger.error('%s failed to get parentDS=%s from DB' % (self.pandaID,unmergedDsName)) else: # check status if unmergedDs.status in ['completed','cleanup','tobeclosed']: _logger.debug('%s skip %s due to status=%s' % (self.pandaID,unmergedDsName,unmergedDs.status)) else: # set status unmergedDs.status = finalStatus # append to avoid repetition topUserDsList.append(unmergedDsName) # update DB retTopT = self.taskBuffer.updateDatasets([unmergedDs],withLock=True,withCriteria="status<>:crStatus", criteriaMap={':crStatus':unmergedDs.status}) if len(retTopT) > 0 and retTopT[0]==1: _logger.debug('%s set %s to parent dataset : %s' % (self.pandaID,unmergedDs.status,unmergedDsName)) else: _logger.debug('%s failed to update parent dataset : %s' % (self.pandaID,unmergedDsName)) if self.pandaDDM and self.job.prodSourceLabel=='managed': # instantiate SiteMapper if self.siteMapper == None: self.siteMapper = SiteMapper(self.taskBuffer) # get file list for PandaDDM retList = self.taskBuffer.queryFilesWithMap({'destinationDBlock':destinationDBlock}) lfnsStr = '' guidStr = '' for tmpFile in retList: if tmpFile.type in ['log','output']: lfnsStr += '%s,' % tmpFile.lfn guidStr += '%s,' % tmpFile.GUID if lfnsStr != '': guidStr = guidStr[:-1] lfnsStr = lfnsStr[:-1] # create a DDM job ddmjob = JobSpec() ddmjob.jobDefinitionID = int(time.time()) % 10000 ddmjob.jobName = "%s" % commands.getoutput('uuidgen') ddmjob.transformation = 'http://pandaserver.cern.ch:25080/trf/mover/run_dq2_cr' ddmjob.destinationDBlock = 'testpanda.%s' % ddmjob.jobName ddmjob.computingSite = "BNL_ATLAS_DDM" ddmjob.destinationSE = ddmjob.computingSite ddmjob.currentPriority = 200000 ddmjob.prodSourceLabel = 'ddm' ddmjob.transferType = 'sub' # append log file fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % ddmjob.jobName fileOL.destinationDBlock = ddmjob.destinationDBlock fileOL.destinationSE = ddmjob.destinationSE fileOL.dataset = ddmjob.destinationDBlock fileOL.type = 'log' ddmjob.addFile(fileOL) # make arguments dstDQ2ID = 'BNLPANDA' srcDQ2ID = self.siteMapper.getSite(self.job.computingSite).ddm callBackURL = 'https://%s:%s/server/panda/datasetCompleted?vuid=%s&site=%s' % \ (panda_config.pserverhost,panda_config.pserverport, dataset.vuid,dstDQ2ID) _logger.debug(callBackURL) # set src/dest ddmjob.sourceSite = srcDQ2ID ddmjob.destinationSite = dstDQ2ID # if src==dst, send callback without ddm job if dstDQ2ID == srcDQ2ID: comout = commands.getoutput('curl -k %s' % callBackURL) _logger.debug(comout) else: # run dq2_cr callBackURL = urllib.quote(callBackURL) # get destination dir destDir = brokerage.broker_util._getDefaultStorage(self.siteMapper.getSite(self.job.computingSite).dq2url) argStr = "-s %s -r %s --guids %s --lfns %s --callBack %s -d %s/%s %s" % \ (srcDQ2ID,dstDQ2ID,guidStr,lfnsStr,callBackURL,destDir, destinationDBlock,destinationDBlock) # set job parameters ddmjob.jobParameters = argStr _logger.debug('%s pdq2_cr %s' % (self.pandaID,ddmjob.jobParameters)) ddmJobs.append(ddmjob) # start Activator if re.search('_sub\d+$',dataset.name) == None: if self.job.prodSourceLabel=='panda' and self.job.processingType in ['merge','unmerge']: # don't trigger Activator for merge jobs pass else: if self.job.jobStatus == 'finished': aThr = Activator(self.taskBuffer,dataset) aThr.start() aThr.join() else: # unset flag since another thread already updated #flagComplete = False pass else: # update dataset in DB self.taskBuffer.updateDatasets(dsList,withLock=True,withCriteria="status<>:crStatus AND status<>:lockStatus ", criteriaMap={':crStatus':finalStatus,':lockStatus':'locked'}) # unset flag flagComplete = False # end _logger.debug('%s end %s' % (self.pandaID,destinationDBlock)) # start DDM jobs if ddmJobs != []: self.taskBuffer.storeJobs(ddmJobs,self.job.prodUserID,joinThr=True) # change pending jobs to failed finalizedFlag = True if flagComplete and self.job.prodSourceLabel=='user': _logger.debug('%s finalize %s %s' % (self.pandaID,self.job.prodUserName,self.job.jobDefinitionID)) finalizedFlag = self.taskBuffer.finalizePendingJobs(self.job.prodUserName,self.job.jobDefinitionID,waitLock=True) _logger.debug('%s finalized with %s' % (self.pandaID,finalizedFlag)) # update unmerged datasets in JEDI to trigger merging if flagComplete and self.job.produceUnMerge() and finalStatusDS != []: if finalizedFlag: self.taskBuffer.updateUnmergedDatasets(self.job,finalStatusDS) # start notifier _logger.debug('%s source:%s complete:%s' % (self.pandaID,self.job.prodSourceLabel,flagComplete)) if (self.job.jobStatus != 'transferring') and ((flagComplete and self.job.prodSourceLabel=='user') or \ (self.job.jobStatus=='failed' and self.job.prodSourceLabel=='panda')) and \ self.job.lockedby != 'jedi': # don't send email for merge jobs if (not disableNotifier) and not self.job.processingType in ['merge','unmerge']: useNotifier = True summaryInfo = {} # check all jobDefIDs in jobsetID if not self.job.jobsetID in [0,None,'NULL']: useNotifier,summaryInfo = self.taskBuffer.checkDatasetStatusForNotifier(self.job.jobsetID,self.job.jobDefinitionID, self.job.prodUserName) _logger.debug('%s useNotifier:%s' % (self.pandaID,useNotifier)) if useNotifier: _logger.debug('%s start Notifier' % self.pandaID) nThr = Notifier.Notifier(self.taskBuffer,self.job,self.destinationDBlocks,summaryInfo) nThr.run() _logger.debug('%s end Notifier' % self.pandaID) _logger.debug('%s End' % self.pandaID) except: errType,errValue = sys.exc_info()[:2] _logger.error("%s %s" % (errType,errValue))
import random import commands import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec if len(sys.argv)>1: site = sys.argv[1] else: site = None datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') index = 0 job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),index) job.AtlasRelease = 'Atlas-14.1.0\nAtlas-14.1.0' job.homepackage = 'AtlasProduction/14.1.0.3\nAtlasProduction/14.1.0.3' job.transformation = 'csc_digi_trf.py\ncsc_reco_trf.py' job.destinationDBlock = datasetName job.computingSite = site job.prodDBlock = 'valid1.005200.T1_McAtNlo_Jimmy.simul.HITS.e322_s429_tid022081' job.prodSourceLabel = 'test' job.currentPriority = 10000 job.cloud = 'US'
cloud = sys.argv[2] prodDBlock = sys.argv[3] inputFile = sys.argv[4] datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') files = { inputFile:None, } jobList = [] index = 0 for lfn in files.keys(): index += 1 job = JobSpec() job.jobDefinitionID = (time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),index) job.AtlasRelease = 'Atlas-15.3.1' job.homepackage = 'AtlasProduction/15.3.1.5' job.transformation = 'csc_atlasG4_trf.py' job.destinationDBlock = datasetName job.computingSite = site job.prodDBlock = prodDBlock job.prodSourceLabel = 'test' job.processingType = 'test' job.currentPriority = 10000 job.cloud = cloud job.cmtConfig = 'i686-slc4-gcc34-opt'
def run(self): try: self.putLog('start %s' % self.evpFileName) # lock evp file self.evpFile = open(self.evpFileName) try: fcntl.flock(self.evpFile.fileno(),fcntl.LOCK_EX|fcntl.LOCK_NB) except: # relase self.putLog("cannot lock %s" % self.evpFileName) self.evpFile.close() return True # options runEvtList = [] eventPickDataType = '' eventPickStreamName = '' eventPickDS = [] eventPickAmiTag = '' eventPickNumSites = 1 inputFileList = [] tagDsList = [] tagQuery = '' tagStreamRef = '' skipDaTRI = False runEvtGuidMap = {} ei_api = '' # read evp file for tmpLine in self.evpFile: tmpMatch = re.search('^([^=]+)=(.+)$',tmpLine) # check format if tmpMatch == None: continue tmpItems = tmpMatch.groups() if tmpItems[0] == 'runEvent': # get run and event number tmpRunEvt = tmpItems[1].split(',') if len(tmpRunEvt) == 2: runEvtList.append(tmpRunEvt) elif tmpItems[0] == 'eventPickDataType': # data type eventPickDataType = tmpItems[1] elif tmpItems[0] == 'eventPickStreamName': # stream name eventPickStreamName = tmpItems[1] elif tmpItems[0] == 'eventPickDS': # dataset pattern eventPickDS = tmpItems[1].split(',') elif tmpItems[0] == 'eventPickAmiTag': # AMI tag eventPickAmiTag = tmpItems[1] elif tmpItems[0] == 'eventPickNumSites': # the number of sites where datasets are distributed try: eventPickNumSites = int(tmpItems[1]) except: pass elif tmpItems[0] == 'userName': # user name self.userDN = tmpItems[1] self.putLog("user=%s" % self.userDN) elif tmpItems[0] == 'userTaskName': # user task name self.userTaskName = tmpItems[1] elif tmpItems[0] == 'userDatasetName': # user dataset name self.userDatasetName = tmpItems[1] elif tmpItems[0] == 'lockedBy': # client name self.lockedBy = tmpItems[1] elif tmpItems[0] == 'creationTime': # creation time self.creationTime = tmpItems[1] elif tmpItems[0] == 'params': # parameters self.params = tmpItems[1] elif tmpItems[0] == 'ei_api': # ei api parameter for MC ei_api = tmpItems[1] elif tmpItems[0] == 'inputFileList': # input file list inputFileList = tmpItems[1].split(',') try: inputFileList.remove('') except: pass elif tmpItems[0] == 'tagDS': # TAG dataset tagDsList = tmpItems[1].split(',') elif tmpItems[0] == 'tagQuery': # query for TAG tagQuery = tmpItems[1] elif tmpItems[0] == 'tagStreamRef': # StreamRef for TAG tagStreamRef = tmpItems[1] if not tagStreamRef.endswith('_ref'): tagStreamRef += '_ref' elif tmpItems[0] == 'runEvtGuidMap': # GUIDs try: exec "runEvtGuidMap="+tmpItems[1] except: pass # extract task name if self.userTaskName == '' and self.params != '': try: tmpMatch = re.search('--outDS(=| ) *([^ ]+)',self.params) if tmpMatch != None: self.userTaskName = tmpMatch.group(2) if not self.userTaskName.endswith('/'): self.userTaskName += '/' except: pass # suppress DaTRI if self.params != '': if '--eventPickSkipDaTRI' in self.params: skipDaTRI = True # get compact user name compactDN = self.taskBuffer.cleanUserID(self.userDN) # get jediTaskID self.jediTaskID = self.taskBuffer.getTaskIDwithTaskNameJEDI(compactDN,self.userTaskName) # convert if tagDsList == [] or tagQuery == '': # convert run/event list to dataset/file list tmpRet,locationMap,allFiles = self.pd2p.convertEvtRunToDatasets(runEvtList, eventPickDataType, eventPickStreamName, eventPickDS, eventPickAmiTag, self.userDN, runEvtGuidMap, ei_api ) if not tmpRet: if 'isFatal' in locationMap and locationMap['isFatal'] == True: self.ignoreError = False self.endWithError('Failed to convert the run/event list to a dataset/file list') return False else: # get parent dataset/files with TAG tmpRet,locationMap,allFiles = self.pd2p.getTagParentInfoUsingTagQuery(tagDsList,tagQuery,tagStreamRef) if not tmpRet: self.endWithError('Failed to get parent dataset/file list with TAG') return False # use only files in the list if inputFileList != []: tmpAllFiles = [] for tmpFile in allFiles: if tmpFile['lfn'] in inputFileList: tmpAllFiles.append(tmpFile) allFiles = tmpAllFiles # remove redundant CN from DN tmpDN = self.userDN tmpDN = re.sub('/CN=limited proxy','',tmpDN) tmpDN = re.sub('(/CN=proxy)+$','',tmpDN) # make dataset container tmpRet = self.pd2p.registerDatasetContainerWithDatasets(self.userDatasetName,allFiles, locationMap, nSites=eventPickNumSites, owner=tmpDN) if not tmpRet: self.endWithError('Failed to make a dataset container %s' % self.userDatasetName) return False # skip DaTRI if skipDaTRI: # successfully terminated self.putLog("skip DaTRI") # update task self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID) else: # get candidates tmpRet,candidateMaps = self.pd2p.getCandidates(self.userDatasetName,checkUsedFile=False, useHidden=True) if not tmpRet: self.endWithError('Failed to find candidate for destination') return False # collect all candidates allCandidates = [] for tmpDS,tmpDsVal in candidateMaps.iteritems(): for tmpCloud,tmpCloudVal in tmpDsVal.iteritems(): for tmpSiteName in tmpCloudVal[0]: if not tmpSiteName in allCandidates: allCandidates.append(tmpSiteName) if allCandidates == []: self.endWithError('No candidate for destination') return False # get list of dataset (container) names if eventPickNumSites > 1: # decompose container to transfer datasets separately tmpRet,tmpOut = self.pd2p.getListDatasetReplicasInContainer(self.userDatasetName) if not tmpRet: self.endWithError('Failed to get the size of %s' % self.userDatasetName) return False userDatasetNameList = tmpOut.keys() else: # transfer container at once userDatasetNameList = [self.userDatasetName] # loop over all datasets sitesUsed = [] for tmpUserDatasetName in userDatasetNameList: # get size of dataset container tmpRet,totalInputSize = rucioAPI.getDatasetSize(tmpUserDatasetName) if not tmpRet: self.endWithError('Failed to get the size of %s' % tmpUserDatasetName) return False # run brokerage tmpJob = JobSpec() tmpJob.AtlasRelease = '' self.putLog("run brokerage for %s" % tmpDS) brokerage.broker.schedule([tmpJob],self.taskBuffer,self.siteMapper,True,allCandidates, True,datasetSize=totalInputSize) if tmpJob.computingSite.startswith('ERROR'): self.endWithError('brokerage failed with %s' % tmpJob.computingSite) return False self.putLog("site -> %s" % tmpJob.computingSite) # send transfer request try: tmpDN = rucioAPI.parse_dn(tmpDN) tmpStatus,userInfo = rucioAPI.finger(tmpDN) if not tmpStatus: raise RuntimeError,'user info not found for {0} with {1}'.format(tmpDN,userInfo) tmpDN = userInfo['nickname'] tmpDQ2ID = self.siteMapper.getSite(tmpJob.computingSite).ddm_input tmpMsg = "%s ds=%s site=%s id=%s" % ('registerDatasetLocation for DaTRI ', tmpUserDatasetName, tmpDQ2ID, tmpDN) self.putLog(tmpMsg) rucioAPI.registerDatasetLocation(tmpDS,[tmpDQ2ID],lifetime=14,owner=tmpDN, activity="User Subscriptions") self.putLog('OK') except: errType,errValue = sys.exc_info()[:2] tmpStr = 'Failed to send transfer request : %s %s' % (errType,errValue) tmpStr.strip() tmpStr += traceback.format_exc() self.endWithError(tmpStr) return False # list of sites already used sitesUsed.append(tmpJob.computingSite) self.putLog("used %s sites" % len(sitesUsed)) # set candidates if len(sitesUsed) >= eventPickNumSites: # reset candidates to limit the number of sites allCandidates = sitesUsed sitesUsed = [] else: # remove site allCandidates.remove(tmpJob.computingSite) # send email notification for success tmpMsg = 'A transfer request was successfully sent to Rucio.\n' tmpMsg += 'Your task will get started once transfer is completed.' self.sendEmail(True,tmpMsg) try: # unlock and delete evp file fcntl.flock(self.evpFile.fileno(),fcntl.LOCK_UN) self.evpFile.close() os.remove(self.evpFileName) except: pass # successfully terminated self.putLog("end %s" % self.evpFileName) return True except: errType,errValue = sys.exc_info()[:2] self.endWithError('Got exception %s:%s %s' % (errType,errValue,traceback.format_exc())) return False
def main(): logger.info('Getting tasks with status send and running') # tasks_list = Task.objects.all().filter(Q(status='send') | Q(status='running')) tasks_list = Task.objects.all().filter(name='dvcs2016P09t2r13v1_mu+') logger.info('Got list of %s tasks' % len(tasks_list)) for t in tasks_list: logger.info('Getting jobs in status defined or failed for task %s' % t) jobs_list_count = Job.objects.all().filter(task=t).count() if jobs_list_count > 50: jobs_list = Job.objects.all().filter( task=t).order_by('id')[:max_send_amount] else: jobs_list = Job.objects.all().filter( task=t).order_by('id')[:jobs_list_count] logger.info('Got list of %s jobs' % len(jobs_list)) i = 0 for j in jobs_list: if i >= max_send_amount: break logger.info('Going to send job %s of %s task' % (j.file, j.task.name)) umark = commands.getoutput('uuidgen') datasetName = 'panda.destDB.%s' % umark destName = 'COMPASSPRODDISK' # PanDA will not try to move output data, data will be placed by pilot (based on schedconfig) TMPRAWFILE = j.file[j.file.rfind('/') + 1:] logger.info(TMPRAWFILE) TMPMDSTFILE = 'mDST-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.root' % { 'input_file': j.file, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt, 'phastVer': j.task.phastver } logger.info(TMPMDSTFILE) TMPHISTFILE = '%(runNumber)s-%(runChunk)s-%(prodSlt)s.root' % { 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt } logger.info(TMPHISTFILE) TMPRICHFILE = 'gfile_%(runNumber)s-%(runChunk)s.gfile' % { 'runNumber': j.run_number, 'runChunk': j.chunk_number } logger.info(TMPRICHFILE) EVTDUMPFILE = 'evtdump%(prodSlt)s-%(runChunk)s-%(runNumber)s.raw' % { 'prodSlt': j.task.prodslt, 'runNumber': j.run_number, 'runChunk': j.chunk_number } logger.info(EVTDUMPFILE) STDOUTFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stdout' % { 'prodNameOnly': j.task.soft, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt } logger.info(STDOUTFILE) STDERRFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stderr' % { 'prodNameOnly': j.task.soft, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt } logger.info(STDERRFILE) try: file_year = j.file.split('/')[5] logger.info(file_year) except: logger.error('Error while splitting file to get year') sys.exit(1) ProdPathAndName = j.task.home + j.task.path + j.task.soft job = JobSpec() job.taskID = j.task.id job.jobDefinitionID = 0 job.jobName = '%(prodName)s-%(fileYear)s--%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s' % { 'prodName': j.task.soft, 'fileYear': file_year, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt, 'phastVer': j.task.phastver } job.transformation = j.task.type # payload (can be URL as well) job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 2000 job.prodSourceLabel = 'prod_test' job.computingSite = site job.attemptNr = j.attempt + 1 job.maxAttempt = j.task.max_attempts if j.status == 'failed': job.parentID = j.panda_id head, tail = os.path.split(j.file) # job.transferType = 'direct' job.sourceSite = 'CERN_COMPASS_PROD' # logs, and all files generated during execution will be placed in log (except output file) #job.jobParameters='source /afs/cern.ch/project/eos/installation/compass/etc/setup.sh;export EOS_MGM_URL=root://eoscompass.cern.ch;export PATH=/afs/cern.ch/project/eos/installation/compass/bin:$PATH;ppwd=$(pwd);echo $ppwd;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;coralpath=%(ProdPathAndName)s/coral;echo $coralpath;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";echo $coralpathsetup;source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/template.opt;xrdcp -np $ppwd/%(TMPMDSTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/mDST/%(TMPMDSTFILE)s;xrdcp -np $ppwd/%(TMPHISTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/histos/%(TMPHISTFILE)s;metadataxml=$(ls metadata-*);echo $metadataxml;cp $metadataxml $metadataxml.PAYLOAD;' % {'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'input_file': input_file, 'ProdPathAndName': ProdPathAndName, 'prodName': prodName} job.jobParameters = 'export EOS_MGM_URL=root://eoscompass.cern.ch;ppwd=$(pwd);export COMPASS_SW_PREFIX=/eos/experiment/compass/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;xrdcp -N -f root://castorpublic.cern.ch/%(input_file)s\?svcClass=compasscdr .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/%(template)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;rm %(tail)s' % { 'TMPRAWFILE': TMPRAWFILE, 'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'input_file': j.file, 'ProdPathAndName': ProdPathAndName, 'prodPath': j.task.path, 'prodName': j.task.soft, 'template': j.task.template, 'tail': tail, 'prodSlt': j.task.prodslt, 'EVTDUMPFILE': EVTDUMPFILE, 'STDOUTFILE': STDOUTFILE, 'STDERRFILE': STDERRFILE } fileIRaw = FileSpec() fileIRaw.lfn = "%s" % (j.file) fileIRaw.GUID = '5874a461-61d3-4543-8f34-6fd7a4624e78' fileIRaw.fsize = 1073753368 fileIRaw.checksum = '671608be' fileIRaw.destinationDBlock = job.destinationDBlock fileIRaw.destinationSE = job.destinationSE fileIRaw.dataset = job.destinationDBlock fileIRaw.type = 'input' job.addFile(fileIRaw) fileOstdout = FileSpec() fileOstdout.lfn = "payload_stdout.txt" fileOstdout.destinationDBlock = job.destinationDBlock fileOstdout.destinationSE = job.destinationSE fileOstdout.dataset = job.destinationDBlock fileOstdout.type = 'output' job.addFile(fileOstdout) fileOstderr = FileSpec() fileOstderr.lfn = "payload_stderr.txt" fileOstderr.destinationDBlock = job.destinationDBlock fileOstderr.destinationSE = job.destinationSE fileOstderr.dataset = job.destinationDBlock fileOstderr.type = 'output' job.addFile(fileOstderr) fileOLog = FileSpec() fileOLog.lfn = "%(prodName)s-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.job.log.tgz" % { 'prodName': j.task.soft, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt, 'phastVer': j.task.phastver } fileOLog.destinationDBlock = job.destinationDBlock fileOLog.destinationSE = job.destinationSE fileOLog.dataset = job.destinationDBlock fileOLog.type = 'log' job.addFile(fileOLog) fileOmDST = FileSpec() fileOmDST.lfn = "%s" % (TMPMDSTFILE) fileOmDST.destinationDBlock = job.destinationDBlock fileOmDST.destinationSE = job.destinationSE fileOmDST.dataset = job.destinationDBlock fileOmDST.type = 'output' job.addFile(fileOmDST) fileOTrafdic = FileSpec() fileOTrafdic.lfn = "%s" % (TMPHISTFILE) fileOTrafdic.destinationDBlock = job.destinationDBlock fileOTrafdic.destinationSE = job.destinationSE fileOTrafdic.dataset = job.destinationDBlock fileOTrafdic.type = 'output' job.addFile(fileOTrafdic) fileOtestevtdump = FileSpec() fileOtestevtdump.lfn = "testevtdump.raw" fileOtestevtdump.destinationDBlock = job.destinationDBlock fileOtestevtdump.destinationSE = job.destinationSE fileOtestevtdump.dataset = job.destinationDBlock fileOtestevtdump.type = 'output' job.addFile(fileOtestevtdump) s, o = Client.submitJobs([job], srvID=aSrvID) logger.info(s) logger.info(o) # for x in o: # logger.info("PandaID=%s" % x[0]) # today = datetime.datetime.today() # # if x[0] != 0 and x[0] != 'NULL': # j_update = Job.objects.get(id=j.id) # j_update.panda_id = x[0] # j_update.status = 'sent' # j_update.attempt = j_update.attempt + 1 # j_update.date_updated = today # # try: # j_update.save() # logger.info('Job %s with PandaID %s updated at %s' % (j.id, x[0], today)) # except IntegrityError as e: # logger.exception('Unique together catched, was not saved') # except DatabaseError as e: # logger.exception('Something went wrong while saving: %s' % e.message) # else: # logger.info('Job %s was not added to PanDA' % j.id) i += 1 logger.info('done')
def main(): logger.info('Getting tasks with status send and running') tasks_list = Task.objects.all().filter( Q(status='send') | Q(status='running')) #tasks_list = Task.objects.all().filter(name='dvcs2017align7_mu-') logger.info('Got list of %s tasks' % len(tasks_list)) cdbServerArr = ['compassvm23.cern.ch', 'compassvm24.cern.ch'] cdbServer = cdbServerArr[0] for t in tasks_list: max_send_amount = 1000 logger.info('Getting jobs in status staged or failed for task %s' % t) jobs_list_count = Job.objects.all().filter(task=t).filter( attempt__lt=t.max_attempts).filter( Q(status='staged') | Q(status='failed')).count() if jobs_list_count > 50: jobs_list = Job.objects.all().filter(task=t).filter( attempt__lt=t.max_attempts).filter( Q(status='staged') | Q(status='failed')).order_by( '-number_of_events')[:max_send_amount] else: jobs_list = Job.objects.all().filter(task=t).filter( attempt__lt=t.max_attempts).filter( Q(status='staged') | Q(status='failed')).order_by( '-number_of_events')[:jobs_list_count] logger.info('Got list of %s jobs' % len(jobs_list)) # jobs_list = Job.objects.all().filter(task=t).filter(file='/castor/cern.ch/compass/data/2017/raw/W04/cdr12116-278485.raw') i = 0 for j in jobs_list: if j.attempt >= j.task.max_attempts: logger.info( 'Number of retry attempts has reached for job %s of task %s' % (j.file, j.task.name)) continue if i > max_send_amount: break logger.info('Job %s of %s' % (i, max_send_amount)) logger.info('Going to send job %s of %s task' % (j.file, j.task.name)) umark = commands.getoutput('uuidgen') datasetName = 'panda.destDB.%s' % umark destName = 'local' # PanDA will not try to move output data, data will be placed by pilot (based on schedconfig) TMPRAWFILE = j.file[j.file.rfind('/') + 1:] logger.info(TMPRAWFILE) TMPMDSTFILE = 'mDST-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.root' % { 'input_file': j.file, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt, 'phastVer': j.task.phastver } logger.info(TMPMDSTFILE) TMPHISTFILE = '%(runNumber)s-%(runChunk)s-%(prodSlt)s.root' % { 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt } logger.info(TMPHISTFILE) TMPRICHFILE = 'gfile_%(runNumber)s-%(runChunk)s.gfile' % { 'runNumber': j.run_number, 'runChunk': j.chunk_number } logger.info(TMPRICHFILE) EVTDUMPFILE = 'evtdump%(prodSlt)s-%(runChunk)s-%(runNumber)s.raw' % { 'prodSlt': j.task.prodslt, 'runNumber': j.run_number, 'runChunk': j.chunk_number } logger.info(EVTDUMPFILE) STDOUTFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stdout' % { 'prodNameOnly': j.task.production, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt } logger.info(STDOUTFILE) STDERRFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stderr' % { 'prodNameOnly': j.task.production, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt } logger.info(STDERRFILE) PRODSOFT = j.task.soft logger.info(PRODSOFT) ProdPathAndName = j.task.home + j.task.path + j.task.soft job = JobSpec() job.VO = 'vo.compass.cern.ch' job.taskID = j.task.id job.jobDefinitionID = 0 job.jobName = '%(prodName)s-%(fileYear)s--%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s' % { 'prodName': j.task.production, 'fileYear': j.task.year, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt, 'phastVer': j.task.phastver } job.transformation = j.task.type # payload (can be URL as well) job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 2000 if j.task.type == 'DDD filtering': job.currentPriority = 1000 job.prodSourceLabel = 'prod_test' job.computingSite = j.task.site job.attemptNr = j.attempt + 1 job.maxAttempt = j.task.max_attempts if j.status == 'failed': job.parentID = j.panda_id head, tail = os.path.split(j.file) cdbServer = cdbServerArr[random.randrange(len(cdbServerArr))] # logs, and all files generated during execution will be placed in log (except output file) #job.jobParameters='source /afs/cern.ch/project/eos/installation/compass/etc/setup.sh;export EOS_MGM_URL=root://eoscompass.cern.ch;export PATH=/afs/cern.ch/project/eos/installation/compass/bin:$PATH;ppwd=$(pwd);echo $ppwd;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;coralpath=%(ProdPathAndName)s/coral;echo $coralpath;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";echo $coralpathsetup;source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/template.opt;xrdcp -np $ppwd/%(TMPMDSTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/mDST/%(TMPMDSTFILE)s;xrdcp -np $ppwd/%(TMPHISTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/histos/%(TMPHISTFILE)s;metadataxml=$(ls metadata-*);echo $metadataxml;cp $metadataxml $metadataxml.PAYLOAD;' % {'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'input_file': input_file, 'ProdPathAndName': ProdPathAndName, 'prodName': prodName} if j.task.type == 'test production' or j.task.type == 'mass production' or j.task.type == 'technical production': if j.task.site == 'BW_COMPASS_MCORE': job.jobParameters = 'ppwd=$(pwd);export COMPASS_SW_PREFIX=/scratch/sciteam/criedl/projectdata/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;export PRODSOFT=%(PRODSOFT)s;cp %(input_file)s .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/%(template)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;gzip payload_stderr.out;gzip payload_stdout.out;rm %(tail)s' % { 'TMPRAWFILE': TMPRAWFILE, 'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'PRODSOFT': PRODSOFT, 'input_file': j.file, 'ProdPathAndName': ProdPathAndName, 'prodPath': j.task.path, 'prodName': j.task.production, 'template': j.task.template, 'tail': tail, 'prodSlt': j.task.prodslt, 'EVTDUMPFILE': EVTDUMPFILE, 'STDOUTFILE': STDOUTFILE, 'STDERRFILE': STDERRFILE } else: job.jobParameters = 'export EOS_MGM_URL=root://eoscompass.cern.ch;ppwd=$(pwd);export COMPASS_SW_PREFIX=/eos/experiment/compass/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;export PRODSOFT=%(PRODSOFT)s;xrdcp -N -f root://castorpublic.cern.ch/%(input_file)s\?svcClass=compasscdr .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;export CDBSERVER=%(cdbServer)s;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/%(template)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;gzip payload_stderr.out;gzip payload_stdout.out;rm %(tail)s' % { 'TMPRAWFILE': TMPRAWFILE, 'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'PRODSOFT': PRODSOFT, 'input_file': j.file, 'ProdPathAndName': ProdPathAndName, 'prodPath': j.task.path, 'prodName': j.task.production, 'template': j.task.template, 'tail': tail, 'prodSlt': j.task.prodslt, 'EVTDUMPFILE': EVTDUMPFILE, 'STDOUTFILE': STDOUTFILE, 'STDERRFILE': STDERRFILE, 'cdbServer': cdbServer } if j.task.type == 'DDD filtering': job.jobParameters = 'export EOS_MGM_URL=root://eoscompass.cern.ch;ppwd=$(pwd);export COMPASS_SW_PREFIX=/eos/experiment/compass/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;export PRODSOFT=%(PRODSOFT)s;xrdcp -N -f root://castorpublic.cern.ch/%(input_file)s\?svcClass=compasscdr .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;$CORAL/src/DaqDataDecoding/examples/how-to/ddd --filter-CAL --out=testevtdump.raw %(TMPRAWFILE)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;gzip payload_stderr.out;gzip payload_stdout.out;rm %(tail)s' % { 'TMPRAWFILE': TMPRAWFILE, 'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'PRODSOFT': PRODSOFT, 'input_file': j.file, 'ProdPathAndName': ProdPathAndName, 'prodPath': j.task.path, 'prodName': j.task.production, 'template': j.task.template, 'tail': tail, 'prodSlt': j.task.prodslt, 'EVTDUMPFILE': EVTDUMPFILE, 'STDOUTFILE': STDOUTFILE, 'STDERRFILE': STDERRFILE } # fileIRaw = FileSpec() # fileIRaw.lfn = "%s" % (input_file) # fileIRaw.destinationDBlock = job.destinationDBlock # fileIRaw.destinationSE = job.destinationSE # fileIRaw.dataset = job.destinationDBlock # fileIRaw.type = 'input' # job.addFile(fileIRaw) fileOstdout = FileSpec() fileOstdout.lfn = "payload_stdout.out.gz" fileOstdout.destinationDBlock = job.destinationDBlock fileOstdout.destinationSE = job.destinationSE fileOstdout.dataset = job.destinationDBlock fileOstdout.type = 'output' job.addFile(fileOstdout) fileOstderr = FileSpec() fileOstderr.lfn = "payload_stderr.out.gz" fileOstderr.destinationDBlock = job.destinationDBlock fileOstderr.destinationSE = job.destinationSE fileOstderr.dataset = job.destinationDBlock fileOstderr.type = 'output' job.addFile(fileOstderr) fileOLog = FileSpec() fileOLog.lfn = "%(prodName)s-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.job.log.tgz" % { 'prodName': j.task.production, 'runNumber': j.run_number, 'runChunk': j.chunk_number, 'prodSlt': j.task.prodslt, 'phastVer': j.task.phastver } fileOLog.destinationDBlock = job.destinationDBlock fileOLog.destinationSE = job.destinationSE fileOLog.dataset = job.destinationDBlock fileOLog.type = 'log' job.addFile(fileOLog) if j.task.type == 'test production' or j.task.type == 'mass production' or j.task.type == 'technical production': fileOmDST = FileSpec() fileOmDST.lfn = "%s" % (TMPMDSTFILE) fileOmDST.destinationDBlock = job.destinationDBlock fileOmDST.destinationSE = job.destinationSE fileOmDST.dataset = job.destinationDBlock fileOmDST.type = 'output' job.addFile(fileOmDST) fileOTrafdic = FileSpec() fileOTrafdic.lfn = "%s" % (TMPHISTFILE) fileOTrafdic.destinationDBlock = job.destinationDBlock fileOTrafdic.destinationSE = job.destinationSE fileOTrafdic.dataset = job.destinationDBlock fileOTrafdic.type = 'output' job.addFile(fileOTrafdic) if j.task.type == 'test production' or j.task.type == 'mass production' or j.task.type == 'technical production' or j.task.type == 'DDD filtering': fileOtestevtdump = FileSpec() fileOtestevtdump.lfn = "testevtdump.raw" fileOtestevtdump.destinationDBlock = job.destinationDBlock fileOtestevtdump.destinationSE = job.destinationSE fileOtestevtdump.dataset = job.destinationDBlock fileOtestevtdump.type = 'output' job.addFile(fileOtestevtdump) s, o = Client.submitJobs([job], srvID=aSrvID) logger.info(s) for x in o: logger.info("PandaID=%s" % x[0]) if x[0] != 0 and x[0] != 'NULL': j_update = Job.objects.get(id=j.id) j_update.panda_id = x[0] j_update.status = 'sent' j_update.attempt = j_update.attempt + 1 j_update.date_updated = timezone.now() try: j_update.save() logger.info('Job %s with PandaID %s updated at %s' % (j.id, x[0], timezone.now())) if j_update.task.status == 'send': logger.info( 'Going to update status of task %s from send to running' % j_update.task.name) t_update = Task.objects.get(id=j_update.task.id) t_update.status = 'running' t_update.date_updated = timezone.now() try: t_update.save() logger.info('Task %s updated' % t_update.name) except IntegrityError as e: logger.exception( 'Unique together catched, was not saved') except DatabaseError as e: logger.exception( 'Something went wrong while saving: %s' % e.message) except IntegrityError as e: logger.exception( 'Unique together catched, was not saved') except DatabaseError as e: logger.exception( 'Something went wrong while saving: %s' % e.message) else: logger.info('Job %s was not added to PanDA' % j.id) i += 1 logger.info('done')
import time import commands import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = commands.getoutput('/usr/bin/uuidgen') job.AtlasRelease = 'Atlas-9.0.4' job.prodDBlock = 'pandatest.000003.dd.input' job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput( '/usr/bin/uuidgen') job.destinationSE = 'BNL_SE' ids = { 'pandatest.000003.dd.input._00028.junk': '6c19e1fc-ee8c-4bae-bd4c-c9e5c73aca27', 'pandatest.000003.dd.input._00033.junk': '98f79ba1-1793-4253-aac7-bdf90a51d1ee', 'pandatest.000003.dd.input._00039.junk': '33660dd5-7cef-422a-a7fc-6c24cb10deb1' } for lfn in ids.keys(): file = FileSpec() file.lfn = lfn file.GUID = ids[file.lfn] file.dataset = 'pandatest.000003.dd.input' file.type = 'input' job.addFile(file)
def master_prepare(self, app, appconfig): '''Prepare the master job''' from pandatools import Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.')) # set chirp variables if configPanda['chirpconfig'] or configPanda['chirpserver']: setChirpVariables() # Pack inputsandbox inputsandbox = 'sources.%s.tar' % commands.getoutput( 'uuidgen 2> /dev/null') inpw = job.getInputWorkspace() # add user script to inputsandbox if hasattr(job.application.exe, "name"): if not job.application.exe in job.inputsandbox: job.inputsandbox.append(job.application.exe) for fname in [f.name for f in job.inputsandbox]: fname.rstrip(os.sep) path = fname[:fname.rfind(os.sep)] f = fname[fname.rfind(os.sep) + 1:] rc, output = commands.getstatusoutput( 'tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f)) if rc: logger.error('Packing inputsandbox failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError( None, 'Packing inputsandbox failed.') if len(job.inputsandbox) > 0: rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox))) if rc: logger.error('Packing inputsandbox failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError( None, 'Packing inputsandbox failed.') inputsandbox += ".gz" else: inputsandbox = None # Upload Inputsandbox if inputsandbox: logger.debug('Uploading source tarball ...') uploadSources(inpw.getPath(), os.path.basename(inputsandbox)) self.inputsandbox = inputsandbox else: self.inputsandbox = None # input dataset if job.inputdata: if job.inputdata._name != 'DQ2Dataset': raise ApplicationConfigurationError( None, 'PANDA application supports only DQ2Datasets') # run brokerage here if not splitting if not job.splitter: from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) elif job.splitter._name not in [ 'DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask' ]: raise ApplicationConfigurationError( None, 'Panda splitter must be DQ2JobSplitter or ArgSplitter') if job.backend.site == 'AUTO': raise ApplicationConfigurationError( None, 'site is still AUTO after brokerage!') # output dataset if job.outputdata: if job.outputdata._name != 'DQ2OutputDataset': raise ApplicationConfigurationError( None, 'Panda backend supports only DQ2OutputDataset') else: logger.info('Adding missing DQ2OutputDataset') job.outputdata = DQ2OutputDataset() job.outputdata.datasetname, outlfn = dq2outputdatasetname( job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname) self.outDsLocation = Client.PandaSites[job.backend.site]['ddm'] try: Client.addDataset(job.outputdata.datasetname, False, location=self.outDsLocation) logger.info('Output dataset %s registered at %s' % (job.outputdata.datasetname, self.outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation) except exceptions.SystemExit: raise BackendError( 'Panda', 'Exception in Client.addDataset %s: %s %s' % (job.outputdata.datasetname, sys.exc_info()[0], sys.exc_info()[1])) # handle the libds if job.backend.libds: self.libDataset = job.backend.libds self.fileBO = getLibFileSpecFromLibDS(self.libDataset) self.library = self.fileBO.lfn elif job.backend.bexec: self.libDataset = job.outputdata.datasetname + '.lib' self.library = '%s.tgz' % self.libDataset try: Client.addDataset(self.libDataset, False, location=self.outDsLocation) dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation) logger.info('Lib dataset %s registered at %s' % (self.libDataset, self.outDsLocation)) except exceptions.SystemExit: raise BackendError( 'Panda', 'Exception in Client.addDataset %s: %s %s' % (self.libDataset, sys.exc_info()[0], sys.exc_info()[1])) # collect extOutFiles self.extOutFile = [] for tmpName in job.outputdata.outputdata: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.outputsandbox: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.backend.extOutFile: if tmpName != '': self.extOutFile.append(tmpName) # create build job if job.backend.bexec != '': jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.transformation = '%s/buildGen-00-00-01' % Client.baseURLSUB if Client.isDQ2free(job.backend.site): jspec.destinationDBlock = '%s/%s' % ( job.outputdata.datasetname, self.libDataset) jspec.destinationSE = 'local' else: jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = configPanda['prodSourceLabelBuild'] jspec.processingType = configPanda['processingType'] jspec.assignedPriority = configPanda['assignedPriorityBuild'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.jobParameters = '-o %s' % (self.library) if self.inputsandbox: jspec.jobParameters += ' -i %s' % (self.inputsandbox) else: raise ApplicationConfigurationError( None, 'Executable on Panda with build job defined, but inputsandbox is emtpy !' ) matchURL = re.search('(http.*://[^/]+)/', Client.baseURLCSRVSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1) if job.backend.bexec != '': jspec.jobParameters += ' --bexec "%s" ' % urllib.quote( job.backend.bexec) jspec.jobParameters += ' -r %s ' % '.' fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) return jspec else: return None
from taskbuffer.FileSpec import FileSpec aSrvID = None for idx,argv in enumerate(sys.argv): if argv == '-s': aSrvID = sys.argv[idx+1] sys.argv = sys.argv[:idx] break site = sys.argv[1] datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = None job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s" % commands.getoutput('uuidgen') job.transformation = 'https://atlpan.web.cern.ch/atlpan/test.sh' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 1000 job.prodSourceLabel = 'test' job.computingSite = site job.jobParameters="aaaaa" fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % job.jobName fileOL.destinationDBlock = job.destinationDBlock fileOL.destinationSE = job.destinationSE
import time import commands import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = commands.getoutput('/usr/bin/uuidgen') job.AtlasRelease = 'Atlas-9.0.4' job.prodDBlock = 'pandatest.000003.dd.input' job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('/usr/bin/uuidgen') job.destinationSE = 'BNL_SE' ids = {'pandatest.000003.dd.input._00028.junk':'6c19e1fc-ee8c-4bae-bd4c-c9e5c73aca27', 'pandatest.000003.dd.input._00033.junk':'98f79ba1-1793-4253-aac7-bdf90a51d1ee', 'pandatest.000003.dd.input._00039.junk':'33660dd5-7cef-422a-a7fc-6c24cb10deb1'} for lfn in ids.keys(): file = FileSpec() file.lfn = lfn file.GUID = ids[file.lfn] file.dataset = 'pandatest.000003.dd.input' file.type = 'input' job.addFile(file) s,o = Client.submitJobs([job]) print "---------------------" print s print o print "---------------------" s,o = Client.getJobStatus([4934, 4766, 4767, 4768, 4769])
cloud = sys.argv[2] prodDBlock = sys.argv[3] inputFile = sys.argv[4] datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') files = { inputFile: None, } jobList = [] index = 0 for lfn in files.keys(): index += 1 job = JobSpec() job.jobDefinitionID = (time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index) job.AtlasRelease = 'Atlas-16.6.2' job.homepackage = 'AtlasProduction/16.6.2.1' job.transformation = 'AtlasG4_trf.py' job.destinationDBlock = datasetName job.computingSite = site job.prodDBlock = prodDBlock job.prodSourceLabel = 'test' job.processingType = 'test' job.currentPriority = 10000 job.cloud = cloud job.cmtConfig = 'i686-slc5-gcc43-opt'
import commands import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec if len(sys.argv) > 1: site = sys.argv[1] else: site = None jobList = [] for i in range(2): datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'ANALY_BNL_ATLAS_1' job = JobSpec() job.jobDefinitionID = 1 job.jobName = commands.getoutput('uuidgen') job.AtlasRelease = 'Atlas-12.0.2' job.homepackage = 'AnalysisTransforms' job.transformation = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena2' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 3000 job.prodSourceLabel = 'user' job.computingSite = site job.prodDBlock = 'testIdeal_06.005001.pythia_minbias.recon.AOD.v12000103' fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % commands.getoutput('uuidgen') fileOL.destinationDBlock = job.destinationDBlock
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """Prepare the specific aspec of each subjob. Returns: subjobconfig list of objects understood by backends.""" from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs # make sure we have the correct siteType refreshPandaSpecs() job = app._getParent() masterjob = job._getRoot() logger.debug('ProdTransPandaRTHandler prepare called for %s', job.getFQID('.')) job.backend.actualCE = job.backend.site job.backend.requirements.cloud = Client.PandaSites[ job.backend.site]['cloud'] # check that the site is in a submit-able status if not job.splitter or job.splitter._name != 'DQ2JobSplitter': allowed_sites = job.backend.list_ddm_sites() try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] tmpDsExist = False if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')): #if Client.getDatasets(job.outputdata.datasetname): if getDatasets(job.outputdata.datasetname): tmpDsExist = True logger.info('Re-using output dataset %s' % job.outputdata.datasetname) if not configPanda[ 'specialHandling'] == 'ddm:rucio' and not configPanda[ 'processingType'].startswith( 'gangarobot' ) and not configPanda['processingType'].startswith( 'hammercloud') and not configPanda[ 'processingType'].startswith('rucio_test'): Client.addDataset(job.outputdata.datasetname, False, location=outDsLocation, allowProdDisk=True, dsExist=tmpDsExist) logger.info('Output dataset %s registered at %s' % (job.outputdata.datasetname, outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation) except exceptions.SystemExit: raise BackendError( 'Panda', 'Exception in adding dataset %s: %s %s' % (job.outputdata.datasetname, sys.exc_info()[0], sys.exc_info()[1])) # JobSpec. jspec = JobSpec() jspec.currentPriority = app.priority jspec.jobDefinitionID = masterjob.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.coreCount = app.core_count jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release jspec.homepackage = app.home_package jspec.transformation = app.transformation jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = job.backend.site if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' if app.prod_source_label: jspec.prodSourceLabel = app.prod_source_label else: jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.specialHandling = configPanda['specialHandling'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.cmtConfig = app.atlas_cmtconfig if app.dbrelease == 'LATEST': try: latest_dbrelease = getLatestDBReleaseCaching() except: from pandatools import Client latest_dbrelease = Client.getLatestDBRelease() m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease) if m: self.dbrelease_dataset = m.group(1) self.dbrelease = m.group(2) else: raise ApplicationConfigurationError( None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually." ) else: self.dbrelease_dataset = app.dbrelease_dataset self.dbrelease = app.dbrelease jspec.jobParameters = app.job_parameters if self.dbrelease: if self.dbrelease == 'current': jspec.jobParameters += ' --DBRelease=current' else: if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % ( self.dbrelease, ) else: jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % ( self.dbrelease, ) dbspec = FileSpec() dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease dbspec.dataset = self.dbrelease_dataset dbspec.prodDBlock = jspec.prodDBlock dbspec.type = 'input' jspec.addFile(dbspec) if job.inputdata: m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)', job.inputdata.dataset[0]) if not m: logger.error("Error retrieving run number from dataset name") #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name") runnumber = 105200 else: runnumber = int(m.group(2)) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --runNumber %d' % runnumber else: jspec.jobParameters += ' RunNumber=%d' % runnumber # Output files. randomized_lfns = [] ilfn = 0 for lfn, lfntype in zip(app.output_files, app.output_type): ofspec = FileSpec() if app.randomize_lfns: randomized_lfn = lfn + ( '.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4])) else: randomized_lfn = lfn ofspec.lfn = randomized_lfn randomized_lfns.append(randomized_lfn) ofspec.destinationDBlock = jspec.destinationDBlock ofspec.destinationSE = jspec.destinationSE ofspec.dataset = jspec.destinationDBlock ofspec.type = 'output' jspec.addFile(ofspec) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --output%sFile %s' % ( lfntype, randomized_lfns[ilfn]) else: jspec.jobParameters += ' output%sFile=%s' % ( lfntype, randomized_lfns[ilfn]) ilfn = ilfn + 1 # Input files. if job.inputdata: for guid, lfn, size, checksum, scope in zip( job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): ifspec = FileSpec() ifspec.lfn = lfn ifspec.GUID = guid ifspec.fsize = size ifspec.md5sum = checksum ifspec.scope = scope ifspec.dataset = jspec.prodDBlock ifspec.prodDBlock = jspec.prodDBlock ifspec.type = 'input' jspec.addFile(ifspec) if app.input_type: itype = app.input_type else: itype = m.group(5) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join( job.inputdata.names)) else: jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join( job.inputdata.names)) # Log files. lfspec = FileSpec() lfspec.lfn = '%s.job.log.tgz' % jspec.jobName lfspec.destinationDBlock = jspec.destinationDBlock lfspec.destinationSE = jspec.destinationSE lfspec.dataset = jspec.destinationDBlock lfspec.type = 'log' jspec.addFile(lfspec) return jspec
def master_prepare(self,app,appmasterconfig): # PandaTools from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaMCPandaRTHandler master_prepare called for %s', job.getFQID('.')) usertag = configDQ2['usertag'] #usertag='user09' nickname = getNickname(allowMissingNickname=True) self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (usertag,nickname,commands.getoutput('hostname').split('.')[0],int(time.time()),job.id) # self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity()) sources = 'sources.%s.tar.gz' % commands.getoutput('uuidgen 2> /dev/null') self.library = '%s.lib.tgz' % self.libDataset # check DBRelease # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1: # raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'") # unpack library logger.debug('Creating source tarball ...') tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null') os.mkdir(tmpdir) inputbox=[] if os.path.exists(app.transform_archive): # must add a condition on size. inputbox += [ File(app.transform_archive) ] if app.evgen_job_option: self.evgen_job_option=app.evgen_job_option if os.path.exists(app.evgen_job_option): # locally modified job option file to add to the input sand box inputbox += [ File(app.evgen_job_option) ] self.evgen_job_option=app.evgen_job_option.split("/")[-1] # add input sandbox files if (job.inputsandbox): for file in job.inputsandbox: inputbox += [ file ] # add option files for extFile in job.backend.extOutFile: try: shutil.copy(extFile,tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(extFile,tmpdir) # fill the archive for opt_file in inputbox: try: shutil.copy(opt_file.name,tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(opt_file.name,tmpdir) # now tar it up again inpw = job.getInputWorkspace() rc, output = commands.getstatusoutput('tar czf %s -C %s .' % (inpw.getPath(sources),tmpdir)) if rc: logger.error('Packing sources failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError(None,'Packing sources failed.') shutil.rmtree(tmpdir) # upload sources logger.debug('Uploading source tarball ...') try: cwd = os.getcwd() os.chdir(inpw.getPath()) rc, output = Client.putFile(sources) if output != 'True': logger.error('Uploading sources %s failed. Status = %d', sources, rc) logger.error(output) raise ApplicationConfigurationError(None,'Uploading archive failed') finally: os.chdir(cwd) # Use Panda's brokerage ## if job.inputdata and len(app.sites)>0: ## # update cloud, use inputdata's ## from dq2.info.TiersOfATLAS import whichCloud,ToACache ## inclouds=[] ## for site in app.sites: ## cloudSite=whichCloud(app.sites[0]) ## if cloudSite not in inclouds: ## inclouds.append(cloudSite) ## # now converting inclouds content into proper brokering stuff. ## outclouds=[] ## for cloudSite in inclouds: ## for cloudID, eachCloud in ToACache.dbcloud.iteritems(): ## if cloudSite==eachCloud: ## cloud=cloudID ## outclouds.append(cloud) ## break ## print outclouds ## # finally, matching with user's wishes ## if len(outclouds)>0: ## if not job.backend.requirements.cloud: # no user wish, update ## job.backend.requirements.cloud=outclouds[0] ## else: ## try: ## assert job.backend.requirements.cloud in outclouds ## except: ## raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds))) from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) if job.backend.site == 'AUTO': raise ApplicationConfigurationError(None,'site is still AUTO after brokerage!') # output dataset preparation and registration try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] except: raise ApplicationConfigurationError(None,"Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site) if not app.dryrun: for outtype in app.outputpaths.keys(): dset=string.replace(app.outputpaths[outtype],"/",".") dset=dset[1:] # dataset registration must be done only once. print "registering output dataset %s at %s" % (dset,outDsLocation) try: Client.addDataset(dset,False,location=outDsLocation) dq2_set_dataset_lifetime(dset, location=outDsLocation) except: raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % dset) # extend registration to build job lib dataset: print "registering output dataset %s at %s" % (self.libDataset,outDsLocation) try: Client.addDataset(self.libDataset,False,location=outDsLocation) dq2_set_dataset_lifetime(self.libDataset, outDsLocation) except: raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % self.libDataset) ### cacheVer = "-AtlasProduction_" + str(app.prod_release) logger.debug("master job submit?") self.outsite=job.backend.site if app.se_name and app.se_name != "none" and not self.outsite: self.outsite=app.se_name # create build job jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel jspec.homepackage = 'AnalysisTransforms'+cacheVer#+nightVer jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare() jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = 'panda' jspec.assignedPriority = 2000 jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud # jspec.jobParameters = self.args not known yet jspec.jobParameters = '-o %s' % (self.library) if app.userarea: print app.userarea jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea)) else: jspec.jobParameters += ' -i %s' % (sources) jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel) matchURL = re.search('(http.*://[^/]+)/',Client.baseURLSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1) fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) #print "MASTER JOB DETAILS:",jspec.jobParameters return jspec
site = sys.argv[1] cloud = 'CA' elif len(sys.argv) == 3: site = sys.argv[1] cloud = sys.argv[2] else: site = None cloud = None datasetName = 'panda.destDB.%s_tid999991' % commands.getoutput('uuidgen') taskid = 999989 jobList = [] for i in range(1): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i) # job.AtlasRelease = 'Atlas-12.0.6' # job.homepackage = 'AtlasProduction/12.0.6.5' job.AtlasRelease = 'Atlas-12.0.7' job.homepackage = 'AtlasProduction/12.0.7.1' job.transformation = 'csc_evgen_trf.py' job.destinationDBlock = datasetName # job.destinationSE = destName # job.cloud = 'CA' job.cloud = cloud job.taskID = taskid job.currentPriority = 1000 job.prodSourceLabel = 'test'
def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig): '''prepare the subjob specific configuration''' from pandatools import Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaPandaRTHandler prepare called for %s', job.getFQID('.')) # in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter if job.inputdata and not job._getRoot().subjobs: if not job.inputdata.names: contents = job.inputdata.get_contents(overlap=False, size=True) for ds in contents.keys(): for f in contents[ds]: job.inputdata.guids.append( f[0] ) job.inputdata.names.append( f[1][0] ) job.inputdata.sizes.append( f[1][1] ) job.inputdata.checksums.append( f[1][2] ) job.inputdata.scopes.append( f[1][3] ) site = job._getRoot().backend.site job.backend.site = site job.backend.actualCE = site cloud = job._getRoot().backend.requirements.cloud job.backend.requirements.cloud = cloud # if no outputdata are given if not job.outputdata: job.outputdata = DQ2OutputDataset() job.outputdata.datasetname = job._getRoot().outputdata.datasetname #if not job.outputdata.datasetname: else: job.outputdata.datasetname = job._getRoot().outputdata.datasetname if not job.outputdata.datasetname: raise ApplicationConfigurationError('DQ2OutputDataset has no datasetname') jspec = JobSpec() jspec.jobDefinitionID = job._getRoot().id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.transformation = '%s/runGen-00-00-02' % Client.baseURLSUB if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: if not job._getRoot().subjobs or job.id == 0: logger.warning('You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.') jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = site jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.assignedPriority = configPanda['assignedPriorityRun'] jspec.cloud = cloud # memory if job.backend.requirements.memory != -1: jspec.minRamCount = job.backend.requirements.memory # cputime if job.backend.requirements.cputime != -1: jspec.maxCpuCount = job.backend.requirements.cputime jspec.computingSite = site # library (source files) if job.backend.libds: flib = FileSpec() flib.lfn = self.fileBO.lfn flib.GUID = self.fileBO.GUID flib.type = 'input' flib.status = self.fileBO.status flib.dataset = self.fileBO.destinationDBlock flib.dispatchDBlock = self.fileBO.destinationDBlock jspec.addFile(flib) elif job.backend.bexec: flib = FileSpec() flib.lfn = self.library flib.type = 'input' flib.dataset = self.libDataset flib.dispatchDBlock = self.libDataset jspec.addFile(flib) # input files FIXME: many more input types if job.inputdata: for guid, lfn, size, checksum, scope in zip(job.inputdata.guids,job.inputdata.names,job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): finp = FileSpec() finp.lfn = lfn finp.GUID = guid finp.scope = scope # finp.fsize = # finp.md5sum = finp.dataset = job.inputdata.dataset[0] finp.prodDBlock = job.inputdata.dataset[0] finp.dispatchDBlock = job.inputdata.dataset[0] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # output files # outMap = {} #FIXME: if options.outMeta != []: self.rundirectory = "." # log files flog = FileSpec() flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname flog.type = 'log' flog.dataset = job.outputdata.datasetname flog.destinationDBlock = job.outputdata.datasetname flog.destinationSE = job.backend.site jspec.addFile(flog) # job parameters param = '' # source URL matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL) srcURL = "" if matchURL != None: srcURL = matchURL.group(1) param += " --sourceURL %s " % srcURL param += '-r "%s" ' % self.rundirectory exe_name = job.application.exe if job.backend.bexec == '': if hasattr(job.application.exe, "name"): exe_name = os.path.basename(job.application.exe.name) # set jobO parameter if job.application.args: param += ' -j "" -p "%s %s" '%(exe_name,urllib.quote(" ".join(job.application.args))) else: param += ' -j "" -p "%s" '%exe_name if self.inputsandbox: param += ' -a %s '%self.inputsandbox else: param += '-l %s ' % self.library param += '-j "" -p "%s %s" ' % ( exe_name,urllib.quote(" ".join(job.application.args))) if job.inputdata: param += '-i "%s" ' % job.inputdata.names # fill outfiles outfiles = {} for f in self.extOutFile: tarnum = 1 if f.find('*') != -1: # archive * outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S") ) tarnum += 1 else: outfiles[f] = "%s.%s.%s" %(f, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S")) fout = FileSpec() fout.lfn = outfiles[f] fout.type = 'output' fout.dataset = job.outputdata.datasetname fout.destinationDBlock = job.outputdata.datasetname fout.destinationSE = job.backend.site jspec.addFile(fout) param += '-o "%s" ' % (outfiles) # must be double quotes, because python prints strings in 'single quotes' for file in jspec.Files: if file.type in [ 'output', 'log'] and configPanda['chirpconfig']: file.dispatchDBlockToken = configPanda['chirpconfig'] logger.debug('chirp file %s',file) jspec.jobParameters = param return jspec
and PIPELINE_STREAM is not None: jobName = 'job.%(PIPELINE_PROCESSINSTANCE)s.%(PIPELINE_TASK)s.%(PIPELINE_EXECUTIONNUMBER)s.%(prodUserName)s.%(PIPELINE_STREAM)s' % \ {'prodUserName': str(prodUserName), \ 'PIPELINE_TASK': str(PIPELINE_TASK), \ 'PIPELINE_EXECUTIONNUMBER': str(PIPELINE_EXECUTIONNUMBER), \ 'PIPELINE_STREAM': str(PIPELINE_STREAM), \ 'PIPELINE_PROCESSINSTANCE': str(PIPELINE_PROCESSINSTANCE) \ } else: jobName = "%s" % commands.getoutput('uuidgen') if PIPELINE_STREAM is not None: jobDefinitionID = PIPELINE_STREAM else: jobDefinitionID = int(time.time()) % 10000 job = JobSpec() job.jobDefinitionID = jobDefinitionID job.jobName = jobName job.transformation = 'http://pandawms.org/pandawms-jobcache/lsst-trf.sh' job.destinationDBlock = datasetName job.destinationSE = 'local' job.currentPriority = 1000 job.prodSourceLabel = 'panda' job.jobParameters = ' --lsstJobParams="%s" ' % lsstJobParams if prodUserName is not None: job.prodUserName = prodUserName else: job.prodUserName = prodUserNameDefault if PIPELINE_PROCESSINSTANCE is not None: job.taskID = PIPELINE_PROCESSINSTANCE if PIPELINE_EXECUTIONNUMBER is not None:
step = m.group(2) if format=='HITS': step = 'simul' # append oDatasets.append('%s.%s.%s.%s_tid%06d' % (m.group(1),step,format,m.group(3),int(taskID))) # log dataset lDataset = '%s.%s.%s.%s_tid%06d' % (m.group(1),m.group(2),'log',m.group(3),int(taskID)) # instantiate JobSpecs iJob = 0 jobList = [] for line in taskFile: iJob += 1 job = JobSpec() # job ID ###### FIXME job.jobDefinitionID = int(time.time()) % 10000 # job name job.jobName = "%s_%05d.job" % (taskName,iJob) # AtlasRelease if len(re.findall('\.',trfVer)) > 2: match = re.search('^(\d+\.\d+\.\d+)',trfVer) job.AtlasRelease = 'Atlas-%s' % match.group(1) else: job.AtlasRelease = 'Atlas-%s' % trfVer # homepackage vers = trfVer.split('.') if int(vers[0]) <= 11: job.homepackage = 'JobTransforms' for ver in vers:
def defineEvgen16Job(self, i): """Define an Evgen16 job based on predefined values and randomly generated names """ job = JobSpec() job.computingSite = self.__site job.cloud = self.__cloud job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (uuid.uuid1(), i) job.AtlasRelease = 'Atlas-16.6.2' job.homepackage = 'AtlasProduction/16.6.2.1' job.transformation = 'Evgen_trf.py' job.destinationDBlock = self.__datasetName job.destinationSE = self.__destName job.currentPriority = 10000 job.prodSourceLabel = 'test' job.cmtConfig = 'i686-slc5-gcc43-opt' #Output file fileO = FileSpec() fileO.lfn = "%s.evgen.pool.root" % job.jobName fileO.destinationDBlock = job.destinationDBlock fileO.destinationSE = job.destinationSE fileO.dataset = job.destinationDBlock fileO.destinationDBlockToken = 'ATLASDATADISK' fileO.type = 'output' job.addFile(fileO) #Log file fileL = FileSpec() fileL.lfn = "%s.job.log.tgz" % job.jobName fileL.destinationDBlock = job.destinationDBlock fileL.destinationSE = job.destinationSE fileL.dataset = job.destinationDBlock fileL.destinationDBlockToken = 'ATLASDATADISK' fileL.type = 'log' job.addFile(fileL) job.jobParameters = "2760 105048 19901 101 200 MC10.105048.PythiaB_ccmu3mu1X.py %s NONE NONE NONE MC10JobOpts-latest-test.tar.gz" % fileO.lfn return job
from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from taskbuffer.DatasetSpec import DatasetSpec from taskbuffer.DBProxyPool import DBProxyPool import getpass passwd = getpass.getpass() pool = DBProxyPool('adbpro.usatlas.bnl.gov', passwd, 2) proxy = pool.getProxy() import sys import commands job1 = JobSpec() job1.PandaID = 'NULL' job1.jobStatus = 'unknown' job1.computingSite = "aaa" f11 = FileSpec() f11.lfn = 'in1.pool.root' f11.type = 'input' job1.addFile(f11) f12 = FileSpec() f12.lfn = 'out1.pool.root' f12.type = 'output' job1.addFile(f12) job2 = JobSpec() job2.PandaID = 'NULL' job2.jobStatus = 'unknown'
from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec if len(sys.argv) > 1: site = sys.argv[1] else: site = None datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'BNL_SE' jobListE = [] lfnListE = [] for i in range(2): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = commands.getoutput('uuidgen') job.AtlasRelease = 'Atlas-11.0.3' job.homepackage = 'JobTransforms-11-00-03-03' job.transformation = 'share/csc.evgen.trf' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 1000 job.prodSourceLabel = 'test' job.computingSite = site file = FileSpec() file.lfn = "%s.evgen.pool.root" % commands.getoutput('uuidgen') lfnListE.append(file.lfn) file.lfn += ('.%d' % (i + 1))
def send_job(jobid, siteid): _logger.debug('Jobid: ' + str(jobid)) site = sites_.get(siteid) job = jobs_.get(int(jobid)) cont = job.container files_catalog = cont.files fscope = getScope(job.owner.username) datasetName = '{}:{}'.format(fscope, cont.guid) distributive = job.distr.name release = job.distr.release # Prepare runScript parameters = job.distr.command parameters = parameters.replace("$COMMAND$", job.params) parameters = parameters.replace("$USERNAME$", job.owner.username) parameters = parameters.replace("$WORKINGGROUP$", job.owner.working_group) # Prepare metadata metadata = dict(user=job.owner.username) # Prepare PanDA Object pandajob = JobSpec() pandajob.jobDefinitionID = int(time.time()) % 10000 pandajob.jobName = cont.guid pandajob.transformation = client_config.DEFAULT_TRF pandajob.destinationDBlock = datasetName pandajob.destinationSE = site.se pandajob.currentPriority = 1000 pandajob.prodSourceLabel = 'user' pandajob.computingSite = site.ce pandajob.cloud = 'RU' pandajob.VO = 'atlas' pandajob.prodDBlock = "%s:%s" % (fscope, pandajob.jobName) pandajob.coreCount = job.corecount pandajob.metadata = json.dumps(metadata) #pandajob.workingGroup = job.owner.working_group if site.encode_commands: # It requires script wrapper on cluster side pandajob.jobParameters = '%s %s %s "%s"' % (cont.guid, release, distributive, parameters) else: pandajob.jobParameters = parameters has_input = False for fcc in files_catalog: if fcc.type == 'input': f = fcc.file guid = f.guid fileIT = FileSpec() fileIT.lfn = f.lfn fileIT.dataset = pandajob.prodDBlock fileIT.prodDBlock = pandajob.prodDBlock fileIT.type = 'input' fileIT.scope = fscope fileIT.status = 'ready' fileIT.GUID = guid pandajob.addFile(fileIT) has_input = True if fcc.type == 'output': f = fcc.file fileOT = FileSpec() fileOT.lfn = f.lfn fileOT.destinationDBlock = pandajob.prodDBlock fileOT.destinationSE = pandajob.destinationSE fileOT.dataset = pandajob.prodDBlock fileOT.type = 'output' fileOT.scope = fscope fileOT.GUID = f.guid pandajob.addFile(fileOT) # Save replica meta fc.new_replica(f, site) if not has_input: # Add fake input fileIT = FileSpec() fileIT.lfn = "fake.input" fileIT.dataset = pandajob.prodDBlock fileIT.prodDBlock = pandajob.prodDBlock fileIT.type = 'input' fileIT.scope = fscope fileIT.status = 'ready' fileIT.GUID = "fake.guid" pandajob.addFile(fileIT) # Prepare lof file fileOL = FileSpec() fileOL.lfn = "%s.log.tgz" % pandajob.jobName fileOL.destinationDBlock = pandajob.destinationDBlock fileOL.destinationSE = pandajob.destinationSE fileOL.dataset = '{}:logs'.format(fscope) fileOL.type = 'log' fileOL.scope = 'panda' pandajob.addFile(fileOL) # Save log meta log = File() log.scope = fscope log.lfn = fileOL.lfn log.guid = getGUID(log.scope, log.lfn) log.type = 'log' log.status = 'defined' files_.save(log) # Save replica meta fc.new_replica(log, site) # Register file in container fc.reg_file_in_cont(log, cont, 'log') # Submit job o = submitJobs([pandajob]) x = o[0] try: #update PandaID PandaID = int(x[0]) job.pandaid = PandaID job.ce = site.ce except: job.status = 'submit_error' jobs_.save(job) return 0
def run(self): try: while True: _logger.debug('%s start' % self.pandaID) # query job job = self.taskBuffer.peekJobs([self.pandaID], fromDefined=False, fromArchived=False, fromWaiting=False)[0] _logger.debug('%s in %s' % (self.pandaID, job.jobStatus)) # check job status if job == None: _logger.debug('%s escape : not found' % self.pandaID) return if not job.jobStatus in [ 'running', 'sent', 'starting', 'holding', 'stagein', 'stageout' ]: if job.jobStatus == 'transferring' and ( job.prodSourceLabel in ['user', 'panda'] or job.jobSubStatus not in [None, 'NULL', '']): pass else: _logger.debug('%s escape : %s' % (self.pandaID, job.jobStatus)) return # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta( minutes=self.sleepTime) if job.modificationTime < timeLimit or ( job.endTime != 'NULL' and job.endTime < timeLimit): _logger.debug( '%s %s lastmod:%s endtime:%s' % (job.PandaID, job.jobStatus, str( job.modificationTime), str(job.endTime))) destDBList = [] if job.jobStatus == 'sent': # sent job didn't receive reply from pilot within 30 min job.jobDispatcherErrorCode = ErrorCode.EC_SendError job.jobDispatcherErrorDiag = "Sent job didn't receive reply from pilot within 30 min" elif job.exeErrorDiag == 'NULL' and job.pilotErrorDiag == 'NULL': # lost heartbeat job.jobDispatcherErrorCode = ErrorCode.EC_Watcher if job.jobDispatcherErrorDiag == 'NULL': if job.endTime == 'NULL': # normal lost heartbeat job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str( job.modificationTime) else: # job recovery failed job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str( job.endTime) if job.jobStatus == 'transferring': job.jobDispatcherErrorDiag += ' in transferring' # get worker workerSpecs = self.taskBuffer.getWorkersForJob( job.PandaID) if len(workerSpecs) > 0: workerSpec = workerSpecs[0] if workerSpec.status in [ 'finished', 'failed', 'cancelled', 'missed' ]: job.supErrorCode = SupErrors.error_codes[ 'WORKER_ALREADY_DONE'] job.supErrorDiag = 'worker already {0} at {1} with {2}'.format( workerSpec.status, str(workerSpec.endTime), workerSpec.diagMessage) job.supErrorDiag = JobSpec.truncateStringAttr( 'supErrorDiag', job.supErrorDiag) else: # job recovery failed job.jobDispatcherErrorCode = ErrorCode.EC_Recovery job.jobDispatcherErrorDiag = 'job recovery failed for %s hours' % ( self.sleepTime / 60) # set job status job.jobStatus = 'failed' # set endTime for lost heartbeat if job.endTime == 'NULL': # normal lost heartbeat job.endTime = job.modificationTime # set files status for file in job.Files: if file.type == 'output' or file.type == 'log': file.status = 'failed' if not file.destinationDBlock in destDBList: destDBList.append(file.destinationDBlock) # event service if EventServiceUtils.isEventServiceJob( job ) and not EventServiceUtils.isJobCloningJob(job): eventStat = self.taskBuffer.getEventStat( job.jediTaskID, job.PandaID) # set sub status when no sucessful events if EventServiceUtils.ST_finished not in eventStat: job.jobSubStatus = 'es_heartbeat' # update job self.taskBuffer.updateJobs([job], False) # start closer if job.jobStatus == 'failed': source = 'jobDispatcherErrorCode' error_code = job.jobDispatcherErrorCode error_diag = job.jobDispatcherErrorDiag try: _logger.debug( "Watcher will call apply_retrial_rules") retryModule.apply_retrial_rules( self.taskBuffer, job.PandaID, source, error_code, error_diag, job.attemptNr) _logger.debug("apply_retrial_rules is back") except Exception as e: _logger.debug( "apply_retrial_rules excepted and needs to be investigated (%s): %s" % (e, traceback.format_exc())) # updateJobs was successful and it failed a job with taskBufferErrorCode try: _logger.debug("Watcher.run will peek the job") job_tmp = self.taskBuffer.peekJobs( [job.PandaID], fromDefined=False, fromArchived=True, fromWaiting=False)[0] if job_tmp.taskBufferErrorCode: source = 'taskBufferErrorCode' error_code = job_tmp.taskBufferErrorCode error_diag = job_tmp.taskBufferErrorDiag _logger.debug( "Watcher.run 2 will call apply_retrial_rules" ) retryModule.apply_retrial_rules( self.taskBuffer, job_tmp.PandaID, source, error_code, error_diag, job_tmp.attemptNr) _logger.debug("apply_retrial_rules 2 is back") except IndexError: pass except Exception as e: self.logger.error( "apply_retrial_rules 2 excepted and needs to be investigated (%s): %s" % (e, traceback.format_exc())) cThr = Closer(self.taskBuffer, destDBList, job) cThr.start() cThr.join() _logger.debug('%s end' % job.PandaID) return # single action if self.single: return # sleep time.sleep(60 * self.sleepTime) except: type, value, traceBack = sys.exc_info() _logger.error("run() : %s %s" % (type, value)) return
def run(self): try: # get job tmpJobs = self.taskBuffer.getFullJobStatus([self.rPandaID]) if tmpJobs == [] or tmpJobs[0] == None: _logger.debug("cannot find job for PandaID=%s" % self.rPandaID) return self.job = tmpJobs[0] _logger.debug("%s start %s:%s:%s" % (self.token,self.job.jobDefinitionID,self.job.prodUserName,self.job.computingSite)) # using output container if not self.job.destinationDBlock.endswith('/'): _logger.debug("%s ouput dataset container is required" % self.token) _logger.debug("%s end" % self.token) return # FIXEME : dont' touch group jobs for now if self.job.destinationDBlock.startswith('group') and (not self.userRequest): _logger.debug("%s skip group jobs" % self.token) _logger.debug("%s end" % self.token) return # check processingType typesForRebro = ['pathena','prun','ganga','ganga-rbtest'] if not self.job.processingType in typesForRebro: _logger.debug("%s skip processingType=%s not in %s" % \ (self.token,self.job.processingType,str(typesForRebro))) _logger.debug("%s end" % self.token) return # check jobsetID if self.job.jobsetID in [0,'NULL',None]: _logger.debug("%s jobsetID is undefined" % self.token) _logger.debug("%s end" % self.token) return # check metadata if self.job.metadata in [None,'NULL']: _logger.debug("%s metadata is unavailable" % self.token) _logger.debug("%s end" % self.token) return # check --disableRebrokerage match = re.search("--disableRebrokerage",self.job.metadata) if match != None and (not self.simulation) and (not self.forceOpt) \ and (not self.userRequest): _logger.debug("%s diabled rebrokerage" % self.token) _logger.debug("%s end" % self.token) return # check --site match = re.search("--site",self.job.metadata) if match != None and (not self.simulation) and (not self.forceOpt) \ and (not self.userRequest): _logger.debug("%s --site is used" % self.token) _logger.debug("%s end" % self.token) return # check --libDS match = re.search("--libDS",self.job.metadata) if match != None: _logger.debug("%s --libDS is used" % self.token) _logger.debug("%s end" % self.token) return # check --workingGroup since it is site-specific match = re.search("--workingGroup",self.job.metadata) if match != None: _logger.debug("%s workingGroup is specified" % self.token) _logger.debug("%s end" % self.token) return # avoid too many rebrokerage if not self.checkRev(): _logger.debug("%s avoid too many rebrokerage" % self.token) _logger.debug("%s end" % self.token) return # check if multiple JobIDs use the same libDS if self.bPandaID != None and self.buildStatus not in ['finished','failed']: if self.minPandaIDlibDS == None or self.maxPandaIDlibDS == None: _logger.debug("%s max/min PandaIDs are unavailable for the libDS" % self.token) _logger.debug("%s end" % self.token) return tmpPandaIDsForLibDS = self.taskBuffer.getFullJobStatus([self.minPandaIDlibDS,self.maxPandaIDlibDS]) if len(tmpPandaIDsForLibDS) != 2 or tmpPandaIDsForLibDS[0] == None or tmpPandaIDsForLibDS[1] == None: _logger.debug("%s failed to get max/min PandaIDs for the libDS" % self.token) _logger.debug("%s end" % self.token) return # check if tmpPandaIDsForLibDS[0].jobDefinitionID != tmpPandaIDsForLibDS[1].jobDefinitionID: _logger.debug("%s multiple JobIDs use the libDS %s:%s %s:%s" % (self.token,tmpPandaIDsForLibDS[0].jobDefinitionID, self.minPandaIDlibDS,tmpPandaIDsForLibDS[1].jobDefinitionID, self.maxPandaIDlibDS)) _logger.debug("%s end" % self.token) return # check excludedSite if self.excludedSite == None: self.excludedSite = [] match = re.search("--excludedSite( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata) if match != None: self.excludedSite = match.group(3).split(',') # remove empty try: self.excludedSite.remove('') except: pass _logger.debug("%s excludedSite=%s" % (self.token,str(self.excludedSite))) # check cloud if self.cloud == None: match = re.search("--cloud( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata) if match != None: self.cloud = match.group(3) _logger.debug("%s cloud=%s" % (self.token,self.cloud)) # get inDS/LFNs status,tmpMapInDS,maxFileSize = self.taskBuffer.getInDatasetsForReBrokerage(self.jobID,self.userName) if not status: # failed _logger.error("%s failed to get inDS/LFN from DB" % self.token) return status,inputDS = self.getListDatasetsUsedByJob(tmpMapInDS) if not status: # failed _logger.error("%s failed" % self.token) return # get relicas replicaMap = {} unknownSites = {} for tmpDS in inputDS: if tmpDS.endswith('/'): # container status,tmpRepMaps = self.getListDatasetReplicasInContainer(tmpDS) else: # normal dataset status,tmpRepMap = self.getListDatasetReplicas(tmpDS) tmpRepMaps = {tmpDS:tmpRepMap} if not status: # failed _logger.debug("%s failed" % self.token) return # make map per site for tmpDS,tmpRepMap in tmpRepMaps.iteritems(): for tmpSite,tmpStat in tmpRepMap.iteritems(): # ignore special sites if tmpSite in ['CERN-PROD_TZERO','CERN-PROD_DAQ','CERN-PROD_TMPDISK']: continue # ignore tape sites if tmpSite.endswith('TAPE'): continue # keep sites with unknown replica info if tmpStat[-1]['found'] == None: if not unknownSites.has_key(tmpDS): unknownSites[tmpDS] = [] unknownSites[tmpDS].append(tmpSite) # ignore ToBeDeleted if tmpStat[-1]['archived'] in ['ToBeDeleted',]: continue # change EOS if tmpSite.startswith('CERN-PROD_EOS'): tmpSite = 'CERN-PROD_EOS' # change EOS TMP if tmpSite.startswith('CERN-PROD_TMP'): tmpSite = 'CERN-PROD_TMP' # change DISK to SCRATCHDISK tmpSite = re.sub('_[^_-]+DISK$','',tmpSite) # change PERF-XYZ to SCRATCHDISK tmpSite = re.sub('_PERF-[^_-]+$','',tmpSite) # change PHYS-XYZ to SCRATCHDISK tmpSite = re.sub('_PHYS-[^_-]+$','',tmpSite) # patch for BNLPANDA if tmpSite in ['BNLPANDA']: tmpSite = 'BNL-OSG2' # add to map if not replicaMap.has_key(tmpSite): replicaMap[tmpSite] = {} replicaMap[tmpSite][tmpDS] = tmpStat[-1] _logger.debug("%s replica map -> %s" % (self.token,str(replicaMap))) # refresh replica info in needed self.refreshReplicaInfo(unknownSites) # instantiate SiteMapper siteMapper = SiteMapper(self.taskBuffer) # get original DDM origSiteDDM = self.getAggName(siteMapper.getSite(self.job.computingSite).ddm) # check all datasets maxDQ2Sites = [] if inputDS != []: # loop over all sites for tmpSite,tmpDsVal in replicaMap.iteritems(): # loop over all datasets appendFlag = True for tmpOrigDS in inputDS: # check completeness if tmpDsVal.has_key(tmpOrigDS) and tmpDsVal[tmpOrigDS]['found'] != None and \ tmpDsVal[tmpOrigDS]['total'] == tmpDsVal[tmpOrigDS]['found']: pass else: appendFlag = False # append if appendFlag: if not tmpSite in maxDQ2Sites: maxDQ2Sites.append(tmpSite) _logger.debug("%s candidate DQ2s -> %s" % (self.token,str(maxDQ2Sites))) if inputDS != [] and maxDQ2Sites == []: _logger.debug("%s no DQ2 candidate" % self.token) else: maxPandaSites = [] # original maxinputsize origMaxInputSize = siteMapper.getSite(self.job.computingSite).maxinputsize # look for Panda siteIDs for tmpSiteID,tmpSiteSpec in siteMapper.siteSpecList.iteritems(): # use ANALY_ only if not tmpSiteID.startswith('ANALY_'): continue # remove test and local if re.search('_test',tmpSiteID,re.I) != None: continue if re.search('_local',tmpSiteID,re.I) != None: continue # avoid same site if self.avoidSameSite and self.getAggName(tmpSiteSpec.ddm) == origSiteDDM: continue # check DQ2 ID if self.cloud in [None,tmpSiteSpec.cloud] \ and (self.getAggName(tmpSiteSpec.ddm) in maxDQ2Sites or inputDS == []): # excluded sites excludedFlag = False for tmpExcSite in self.excludedSite: if re.search(tmpExcSite,tmpSiteID) != None: excludedFlag = True break if excludedFlag: _logger.debug("%s skip %s since excluded" % (self.token,tmpSiteID)) continue # use online only if tmpSiteSpec.status != 'online': _logger.debug("%s skip %s status=%s" % (self.token,tmpSiteID,tmpSiteSpec.status)) continue # check maxinputsize if (maxFileSize == None and origMaxInputSize > siteMapper.getSite(tmpSiteID).maxinputsize) or \ maxFileSize > siteMapper.getSite(tmpSiteID).maxinputsize: _logger.debug("%s skip %s due to maxinputsize" % (self.token,tmpSiteID)) continue # append if not tmpSiteID in maxPandaSites: maxPandaSites.append(tmpSiteID) # choose at most 20 sites randomly to avoid too many lookup random.shuffle(maxPandaSites) maxPandaSites = maxPandaSites[:20] _logger.debug("%s candidate PandaSites -> %s" % (self.token,str(maxPandaSites))) # no Panda siteIDs if maxPandaSites == []: _logger.debug("%s no Panda site candidate" % self.token) else: # set AtlasRelease and cmtConfig to dummy job tmpJobForBrokerage = JobSpec() if self.job.AtlasRelease in ['NULL',None]: tmpJobForBrokerage.AtlasRelease = '' else: tmpJobForBrokerage.AtlasRelease = self.job.AtlasRelease # use nightlies matchNight = re.search('^AnalysisTransforms-.*_(rel_\d+)$',self.job.homepackage) if matchNight != None: tmpJobForBrokerage.AtlasRelease += ':%s' % matchNight.group(1) # use cache else: matchCache = re.search('^AnalysisTransforms-([^/]+)',self.job.homepackage) if matchCache != None: tmpJobForBrokerage.AtlasRelease = matchCache.group(1).replace('_','-') if not self.job.cmtConfig in ['NULL',None]: tmpJobForBrokerage.cmtConfig = self.job.cmtConfig # memory size if not self.job.minRamCount in ['NULL',None,0]: tmpJobForBrokerage.minRamCount = self.job.minRamCount # CPU count if not self.job.maxCpuCount in ['NULL',None,0]: tmpJobForBrokerage.maxCpuCount = self.job.maxCpuCount # run brokerage brokerage.broker.schedule([tmpJobForBrokerage],self.taskBuffer,siteMapper,forAnalysis=True, setScanSiteList=maxPandaSites,trustIS=True,reportLog=True) newSiteID = tmpJobForBrokerage.computingSite self.brokerageInfo += tmpJobForBrokerage.brokerageErrorDiag _logger.debug("%s runBrokerage - > %s" % (self.token,newSiteID)) # unknown site if not siteMapper.checkSite(newSiteID): _logger.error("%s unknown site" % self.token) _logger.debug("%s failed" % self.token) return # get new site spec newSiteSpec = siteMapper.getSite(newSiteID) # avoid repetition if self.getAggName(newSiteSpec.ddm) == origSiteDDM: _logger.debug("%s assigned to the same site %s " % (self.token,newSiteID)) _logger.debug("%s end" % self.token) return # simulation mode if self.simulation: _logger.debug("%s end simulation" % self.token) return # prepare jobs status = self.prepareJob(newSiteID,newSiteSpec) if status: # run SetUpper statusSetUp = self.runSetUpper() if not statusSetUp: _logger.debug("%s runSetUpper failed" % self.token) else: _logger.debug("%s successfully assigned to %s" % (self.token,newSiteID)) _logger.debug("%s end" % self.token) except: errType,errValue,errTraceBack = sys.exc_info() _logger.error("%s run() : %s %s" % (self.token,errType,errValue))
import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec if len(sys.argv)>1: site = sys.argv[1] else: site = None datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') #destName = 'BNL_SE' jobList = [] for i in [999905,999906,999907]: job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),i) job.AtlasRelease = 'Atlas-14.1.0' job.homepackage = 'AtlasProduction/12.0.6.2' job.transformation = 'csc_evgen_trf.py' job.destinationDBlock = datasetName #job.destinationSE = destName job.currentPriority = 1000 job.prodSourceLabel = 'managed' #job.prodSourceLabel = 'test' #job.computingSite = site job.cmtConfig = 'i686-slc4-gcc34-opt' job.metadata = 'evgen;%s;%s;%s' % (str({'FR': 46, 'NL': 45, 'NDGF': 300, 'CERN': 19, 'TW': 44110, 'CA': 2922, 'DE': 9903, 'IT': 1168, 'US': 6226, 'UK': 1026, 'ES': 26619}),str({999907:100,999906:200,999905:300}),str({999905:100,999906:910,999907:500})) #job.metadata = 'evgen;%s' % str({'FR': 46, 'NL': 45, 'NDGF': 300, 'CERN': 19, 'TW': 44110, 'CA': 2922, 'DE': 9903, 'IT': 1168, 'US': 6226, 'UK': 1026, 'ES': 26619})
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): '''prepare the subjob specific configuration''' from pandatools import Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaPandaRTHandler prepare called for %s', job.getFQID('.')) # in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter if job.inputdata and not job._getRoot().subjobs: if not job.inputdata.names: contents = job.inputdata.get_contents(overlap=False, size=True) for ds in contents.keys(): for f in contents[ds]: job.inputdata.guids.append(f[0]) job.inputdata.names.append(f[1][0]) job.inputdata.sizes.append(f[1][1]) job.inputdata.checksums.append(f[1][2]) job.inputdata.scopes.append(f[1][3]) site = job._getRoot().backend.site job.backend.site = site job.backend.actualCE = site cloud = job._getRoot().backend.requirements.cloud job.backend.requirements.cloud = cloud # if no outputdata are given if not job.outputdata: job.outputdata = DQ2OutputDataset() job.outputdata.datasetname = job._getRoot().outputdata.datasetname #if not job.outputdata.datasetname: else: job.outputdata.datasetname = job._getRoot().outputdata.datasetname if not job.outputdata.datasetname: raise ApplicationConfigurationError( None, 'DQ2OutputDataset has no datasetname') jspec = JobSpec() jspec.jobDefinitionID = job._getRoot().id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.transformation = '%s/runGen-00-00-02' % Client.baseURLSUB if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: if not job._getRoot().subjobs or job.id == 0: logger.warning( 'You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.' ) jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = site jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.assignedPriority = configPanda['assignedPriorityRun'] jspec.cloud = cloud # memory if job.backend.requirements.memory != -1: jspec.minRamCount = job.backend.requirements.memory # cputime if job.backend.requirements.cputime != -1: jspec.maxCpuCount = job.backend.requirements.cputime jspec.computingSite = site # library (source files) if job.backend.libds: flib = FileSpec() flib.lfn = self.fileBO.lfn flib.GUID = self.fileBO.GUID flib.type = 'input' flib.status = self.fileBO.status flib.dataset = self.fileBO.destinationDBlock flib.dispatchDBlock = self.fileBO.destinationDBlock jspec.addFile(flib) elif job.backend.bexec: flib = FileSpec() flib.lfn = self.library flib.type = 'input' flib.dataset = self.libDataset flib.dispatchDBlock = self.libDataset jspec.addFile(flib) # input files FIXME: many more input types if job.inputdata: for guid, lfn, size, checksum, scope in zip( job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): finp = FileSpec() finp.lfn = lfn finp.GUID = guid finp.scope = scope # finp.fsize = # finp.md5sum = finp.dataset = job.inputdata.dataset[0] finp.prodDBlock = job.inputdata.dataset[0] finp.dispatchDBlock = job.inputdata.dataset[0] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # output files # outMap = {} #FIXME: if options.outMeta != []: self.rundirectory = "." # log files flog = FileSpec() flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname flog.type = 'log' flog.dataset = job.outputdata.datasetname flog.destinationDBlock = job.outputdata.datasetname flog.destinationSE = job.backend.site jspec.addFile(flog) # job parameters param = '' # source URL matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL) srcURL = "" if matchURL != None: srcURL = matchURL.group(1) param += " --sourceURL %s " % srcURL param += '-r "%s" ' % self.rundirectory exe_name = job.application.exe if job.backend.bexec == '': if hasattr(job.application.exe, "name"): exe_name = os.path.basename(job.application.exe.name) # set jobO parameter if job.application.args: param += ' -j "" -p "%s %s" ' % ( exe_name, urllib.quote(" ".join(job.application.args))) else: param += ' -j "" -p "%s" ' % exe_name if self.inputsandbox: param += ' -a %s ' % self.inputsandbox else: param += '-l %s ' % self.library param += '-j "" -p "%s %s" ' % ( exe_name, urllib.quote(" ".join(job.application.args))) if job.inputdata: param += '-i "%s" ' % job.inputdata.names # fill outfiles outfiles = {} for f in self.extOutFile: tarnum = 1 if f.find('*') != -1: # archive * outfiles[f] = "outputbox%i.%s.%s.tar.gz" % ( tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S")) tarnum += 1 else: outfiles[f] = "%s.%s.%s" % (f, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S")) fout = FileSpec() fout.lfn = outfiles[f] fout.type = 'output' fout.dataset = job.outputdata.datasetname fout.destinationDBlock = job.outputdata.datasetname fout.destinationSE = job.backend.site jspec.addFile(fout) param += '-o "%s" ' % ( outfiles ) # must be double quotes, because python prints strings in 'single quotes' for file in jspec.Files: if file.type in ['output', 'log'] and configPanda['chirpconfig']: file.dispatchDBlockToken = configPanda['chirpconfig'] logger.debug('chirp file %s', file) jspec.jobParameters = param return jspec
if argv == '-s': aSrvID = sys.argv[idx + 1] sys.argv = sys.argv[:idx] break #site = sys.argv[1] site = 'ANALY_BNL-LSST' #orig #site = 'BNL-LSST' #site = 'SWT2_CPB-LSST' #site = 'UTA_SWT2-LSST' #site = 'ANALY_SWT2_CPB-LSST' datasetName = 'panda.user.jschovan.lsst.%s' % commands.getoutput('uuidgen') destName = None job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s" % commands.getoutput('uuidgen') ### job.transformation = 'http://www.usatlas.bnl.gov/~wenaus/lsst-trf/lsst-trf.sh' #job.transformation = 'http://pandawms.org/pandawms-jobcache/lsst-trf.sh' job.transformation = 'http://pandawms.org/pandawms-jobcache/lsst-trf-phosim332.sh' job.destinationDBlock = datasetName #job.destinationSE = destName job.destinationSE = 'local' job.currentPriority = 1000 #job.prodSourceLabel = 'ptest' #job.prodSourceLabel = 'panda' #job.prodSourceLabel = 'ptest' #job.prodSourceLabel = 'test' #job.prodSourceLabel = 'ptest' ### 2014-01-27
def run(self, data): datasetName = 'panda:panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'ANALY_RRC-KI-HPC' site = 'ANALY_RRC-KI-HPC' scope = config['DEFAULT_SCOPE'] distributive = data['distributive'] release = data['release'] parameters = data['parameters'] input_type = data['input_type'] input_params = data['input_params'] input_files = data['input_files'] output_type = data['output_type'] output_params = data['output_params'] output_files = data['output_files'] jobid = data['jobid'] _logger.debug('Jobid: ' + str(jobid)) job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = commands.getoutput('uuidgen') job.transformation = config['DEFAULT_TRF'] job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 1000 job.prodSourceLabel = 'user' job.computingSite = site job.cloud = 'RU' job.prodDBlock = "%s:%s.%s" % (scope, scope, job.jobName) job.jobParameters = '%s %s "%s"' % (release, distributive, parameters) params = {} _logger.debug('MoveData') ec = 0 ec, uploaded_input_files = movedata( params=params, fileList=input_files, fromType=input_type, fromParams=input_params, toType='hpc', toParams={'dest': '/' + re.sub(':', '/', job.prodDBlock)}) if ec != 0: _logger.error('Move data error: ' + ec[1]) return for file in uploaded_input_files: fileIT = FileSpec() fileIT.lfn = file fileIT.dataset = job.prodDBlock fileIT.prodDBlock = job.prodDBlock fileIT.type = 'input' fileIT.scope = scope fileIT.status = 'ready' fileIT.GUID = commands.getoutput('uuidgen') job.addFile(fileIT) for file in output_files: fileOT = FileSpec() fileOT.lfn = file fileOT.destinationDBlock = job.prodDBlock fileOT.destinationSE = job.destinationSE fileOT.dataset = job.prodDBlock fileOT.type = 'output' fileOT.scope = scope fileOT.GUID = commands.getoutput('uuidgen') job.addFile(fileOT) fileOL = FileSpec() fileOL.lfn = "%s.log.tgz" % job.jobName fileOL.destinationDBlock = job.destinationDBlock fileOL.destinationSE = job.destinationSE fileOL.dataset = job.destinationDBlock fileOL.type = 'log' fileOL.scope = 'panda' job.addFile(fileOL) self.jobList.append(job) #submitJob o = self.submitJobs(self.jobList) x = o[0] #update PandaID conn = MySQLdb.connect( host=self.dbhost, db=self.dbname, # port=self.dbport, connect_timeout=self.dbtimeout, user=self.dbuser, passwd=self.dbpasswd) cur = conn.cursor() try: varDict = {} PandaID = int(x[0]) varDict['id'] = jobid varDict['pandaId'] = PandaID sql = "UPDATE %s SET %s.pandaId=%s WHERE %s.id=%s" % ( self.table_jobs, self.table_jobs, varDict['pandaId'], self.table_jobs, varDict['id']) cur.execute(sql, varDict) except: _logger.error('SENDJOB: Incorrect server response') try: conn.commit() return True except: _logger.error("commit error") return False
destName = None files = { 'misal1_mc12.005802.JF17_pythia_jet_filter.digit.RDO.v12000601_tid008610._11615.pool.root.1':None, #'misal1_mc12.005802.JF17_pythia_jet_filter.digit.RDO.v12000601_tid008610._11639.pool.root.1':None, #'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554._03634.pool.root.1':None, #'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554._03248.pool.root.1':None, #'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554._03634.pool.root.1':None, } jobList = [] index = 0 for lfn in files.keys(): index += 1 job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),index) job.AtlasRelease = 'Atlas-12.0.6' job.homepackage = 'AtlasProduction/12.0.6.4' job.transformation = 'csc_reco_trf.py' job.destinationDBlock = datasetName job.destinationSE = destName job.computingSite = site #job.prodDBlock = 'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554' job.prodDBlock = 'misal1_mc12.005802.JF17_pythia_jet_filter.digit.RDO.v12000601_tid008610' job.cloud = 'US' job.prodSourceLabel = 'test' job.currentPriority = 10000 job.cmtConfig = 'i686-slc4-gcc34-opt'
def createJobSpec(nodes, walltime, command, jobName, outputFile=None): transformation = '#json#' datasetName = 'panda.destDB.%s' % subprocess.check_output('uuidgen') destName = 'local' prodSourceLabel = 'user' currentPriority = 1000 job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = jobName job.VO = VO job.transformation = transformation job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = currentPriority job.prodSourceLabel = prodSourceLabel job.computingSite = QUEUE_NAME job.cmtConfig = json.dumps({'name': job.jobName, 'next': None}) lqcd_command = { "nodes": nodes, "walltime": walltime, "name": job.jobName, "command": command } if (outputFile): lqcd_command['outputFile'] = outputFile job.jobParameters = json.dumps(lqcd_command) fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % job.jobName.strip() fileOL.destinationDBlock = job.destinationDBlock fileOL.destinationSE = job.destinationSE fileOL.dataset = job.destinationDBlock fileOL.type = 'log' job.addFile(fileOL) #job.cmtConfig = None return job
from taskbuffer.FileSpec import FileSpec if len(sys.argv)>1: site = sys.argv[1] else: site = "ANALY_BNL_ATLAS_1" datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'BNL_SE' jobDefinitionID = int(time.time()) % 10000 jobList = [] for i in range(2): job = JobSpec() job.jobDefinitionID = jobDefinitionID job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),i) job.AtlasRelease = 'Atlas-12.0.6' job.homepackage = 'AnalysisTransforms' job.transformation = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthenaXrd' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 3000 job.assignedPriority = 3000 job.prodSourceLabel = 'user' job.computingSite = site file = FileSpec() file.lfn = "%s.AANT._%05d.root" % (job.jobName,i) file.destinationDBlock = job.destinationDBlock
def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue): # variables for submission maxBunchTask = 100 # make logger tmpLog = MsgWrapper(logger) tmpLog.debug('start doBrokerage') # return for failure retFatal = self.SC_FATAL retTmpError = self.SC_FAILED tmpLog.debug('vo={0} label={1} queue={2}'.format( vo, prodSourceLabel, workQueue.queue_name)) # loop over all tasks allRwMap = {} prioMap = {} tt2Map = {} expRWs = {} jobSpecList = [] for tmpJediTaskID, tmpInputList in inputList: for taskSpec, cloudName, inputChunk in tmpInputList: # make JobSpec to be submitted for TaskAssigner jobSpec = JobSpec() jobSpec.taskID = taskSpec.jediTaskID jobSpec.jediTaskID = taskSpec.jediTaskID # set managed to trigger TA jobSpec.prodSourceLabel = 'managed' jobSpec.processingType = taskSpec.processingType jobSpec.workingGroup = taskSpec.workingGroup jobSpec.metadata = taskSpec.processingType jobSpec.assignedPriority = taskSpec.taskPriority jobSpec.currentPriority = taskSpec.currentPriority jobSpec.maxDiskCount = ( taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024 if taskSpec.useWorldCloud(): # use destinationSE to trigger task brokerage in WORLD cloud jobSpec.destinationSE = taskSpec.cloud prodDBlock = None setProdDBlock = False for datasetSpec in inputChunk.getDatasets(): prodDBlock = datasetSpec.datasetName if datasetSpec.isMaster(): jobSpec.prodDBlock = datasetSpec.datasetName setProdDBlock = True for fileSpec in datasetSpec.Files: tmpInFileSpec = fileSpec.convertToJobFileSpec( datasetSpec) jobSpec.addFile(tmpInFileSpec) # use secondary dataset name as prodDBlock if setProdDBlock == False and prodDBlock != None: jobSpec.prodDBlock = prodDBlock # append jobSpecList.append(jobSpec) prioMap[jobSpec.taskID] = jobSpec.currentPriority tt2Map[jobSpec.taskID] = jobSpec.processingType # get RW for a priority if not allRwMap.has_key(jobSpec.currentPriority): tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI( vo, prodSourceLabel, workQueue, jobSpec.currentPriority) if tmpRW == None: tmpLog.error( 'failed to calculate RW with prio={0}'.format( jobSpec.currentPriority)) return retTmpError allRwMap[jobSpec.currentPriority] = tmpRW # get expected RW expRW = self.taskBufferIF.calculateTaskRW_JEDI( jobSpec.jediTaskID) if expRW == None: tmpLog.error( 'failed to calculate RW for jediTaskID={0}'.format( jobSpec.jediTaskID)) return retTmpError expRWs[jobSpec.taskID] = expRW # get fullRWs fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI( vo, prodSourceLabel, None, None) if fullRWs == None: tmpLog.error('failed to calculate full RW') return retTmpError # set metadata for jobSpec in jobSpecList: rwValues = allRwMap[jobSpec.currentPriority] jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % ( jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap), str(fullRWs), str(tt2Map)) tmpLog.debug('run task assigner for {0} tasks'.format( len(jobSpecList))) nBunchTask = 0 while nBunchTask < len(jobSpecList): # get a bunch jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask] strIDs = 'jediTaskID=' for tmpJobSpec in jobsBunch: strIDs += '{0},'.format(tmpJobSpec.taskID) strIDs = strIDs[:-1] tmpLog.debug(strIDs) # increment index nBunchTask += maxBunchTask # run task brokerge stS, outSs = PandaClient.runTaskAssignment(jobsBunch) tmpLog.debug('{0}:{1}'.format(stS, str(outSs))) # return tmpLog.debug('done') return self.SC_SUCCEEDED
cloud = sys.argv[2] prodDBlock = sys.argv[3] inputFile = sys.argv[4] datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') files = { inputFile: None, } jobList = [] index = 0 for lfn in files.keys(): index += 1 job = JobSpec() job.jobDefinitionID = (time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index) job.AtlasRelease = 'Atlas-15.3.1' job.homepackage = 'AtlasProduction/15.3.1.5' job.transformation = 'csc_atlasG4_trf.py' job.destinationDBlock = datasetName job.computingSite = site job.prodDBlock = prodDBlock job.prodSourceLabel = 'test' job.processingType = 'test' job.currentPriority = 10000 job.cloud = cloud job.cmtConfig = 'i686-slc4-gcc34-opt'
cloud = sys.argv[2] prodDBlock = sys.argv[3] inputFile = sys.argv[4] datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') files = { inputFile:None, } jobList = [] index = 0 for lfn in files.keys(): index += 1 job = JobSpec() job.jobDefinitionID = (time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),index) job.AtlasRelease = 'Atlas-17.0.5' job.homepackage = 'AtlasProduction/17.0.5.6' job.transformation = 'AtlasG4_trf.py' job.destinationDBlock = datasetName job.computingSite = site job.prodDBlock = prodDBlock job.prodSourceLabel = 'test' job.processingType = 'test' job.currentPriority = 10000 job.cloud = cloud job.cmtConfig = 'i686-slc5-gcc43-opt'
def run(self): try: self.putLog('start %s' % self.evpFileName) # lock evp file self.evpFile = open(self.evpFileName) try: fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) except: # relase self.putLog("cannot lock %s" % self.evpFileName) self.evpFile.close() return True # options runEvtList = [] eventPickDataType = '' eventPickStreamName = '' eventPickDS = [] eventPickAmiTag = '' eventPickNumSites = 1 inputFileList = [] tagDsList = [] tagQuery = '' tagStreamRef = '' skipDaTRI = False runEvtGuidMap = {} # read evp file for tmpLine in self.evpFile: tmpMatch = re.search('^([^=]+)=(.+)$', tmpLine) # check format if tmpMatch == None: continue tmpItems = tmpMatch.groups() if tmpItems[0] == 'runEvent': # get run and event number tmpRunEvt = tmpItems[1].split(',') if len(tmpRunEvt) == 2: runEvtList.append(tmpRunEvt) elif tmpItems[0] == 'eventPickDataType': # data type eventPickDataType = tmpItems[1] elif tmpItems[0] == 'eventPickStreamName': # stream name eventPickStreamName = tmpItems[1] elif tmpItems[0] == 'eventPickDS': # dataset pattern eventPickDS = tmpItems[1].split(',') elif tmpItems[0] == 'eventPickAmiTag': # AMI tag eventPickAmiTag = tmpItems[1] elif tmpItems[0] == 'eventPickNumSites': # the number of sites where datasets are distributed try: eventPickNumSites = int(tmpItems[1]) except: pass elif tmpItems[0] == 'userName': # user name self.userDN = tmpItems[1] self.putLog("user=%s" % self.userDN) elif tmpItems[0] == 'userTaskName': # user task name self.userTaskName = tmpItems[1] elif tmpItems[0] == 'userDatasetName': # user dataset name self.userDatasetName = tmpItems[1] elif tmpItems[0] == 'lockedBy': # client name self.lockedBy = tmpItems[1] elif tmpItems[0] == 'creationTime': # creation time self.creationTime = tmpItems[1] elif tmpItems[0] == 'params': # parameters self.params = tmpItems[1] elif tmpItems[0] == 'inputFileList': # input file list inputFileList = tmpItems[1].split(',') try: inputFileList.remove('') except: pass elif tmpItems[0] == 'tagDS': # TAG dataset tagDsList = tmpItems[1].split(',') elif tmpItems[0] == 'tagQuery': # query for TAG tagQuery = tmpItems[1] elif tmpItems[0] == 'tagStreamRef': # StreamRef for TAG tagStreamRef = tmpItems[1] if not tagStreamRef.endswith('_ref'): tagStreamRef += '_ref' elif tmpItems[0] == 'runEvtGuidMap': # GUIDs try: exec "runEvtGuidMap=" + tmpItems[1] except: pass # extract task name if self.userTaskName == '' and self.params != '': try: tmpMatch = re.search('--outDS(=| ) *([^ ]+)', self.params) if tmpMatch != None: self.userTaskName = tmpMatch.group(2) if not self.userTaskName.endswith('/'): self.userTaskName += '/' except: pass # suppress DaTRI if self.params != '': if '--eventPickSkipDaTRI' in self.params: skipDaTRI = True # get compact user name compactDN = self.taskBuffer.cleanUserID(self.userDN) # get jediTaskID self.jediTaskID = self.taskBuffer.getTaskIDwithTaskNameJEDI( compactDN, self.userTaskName) # convert if tagDsList == [] or tagQuery == '': # convert run/event list to dataset/file list tmpRet, locationMap, allFiles = self.pd2p.convertEvtRunToDatasets( runEvtList, eventPickDataType, eventPickStreamName, eventPickDS, eventPickAmiTag, self.userDN, runEvtGuidMap) if not tmpRet: if 'isFatal' in locationMap and locationMap[ 'isFatal'] == True: self.ignoreError = False self.endWithError( 'Failed to convert the run/event list to a dataset/file list' ) return False else: # get parent dataset/files with TAG tmpRet, locationMap, allFiles = self.pd2p.getTagParentInfoUsingTagQuery( tagDsList, tagQuery, tagStreamRef) if not tmpRet: self.endWithError( 'Failed to get parent dataset/file list with TAG') return False # use only files in the list if inputFileList != []: tmpAllFiles = [] for tmpFile in allFiles: if tmpFile['lfn'] in inputFileList: tmpAllFiles.append(tmpFile) allFiles = tmpAllFiles # remove redundant CN from DN tmpDN = self.userDN tmpDN = re.sub('/CN=limited proxy', '', tmpDN) tmpDN = re.sub('(/CN=proxy)+$', '', tmpDN) # make dataset container tmpRet = self.pd2p.registerDatasetContainerWithDatasets( self.userDatasetName, allFiles, locationMap, nSites=eventPickNumSites, owner=tmpDN) if not tmpRet: self.endWithError('Failed to make a dataset container %s' % self.userDatasetName) return False # skip DaTRI if skipDaTRI: # successfully terminated self.putLog("skip DaTRI") # update task self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID) else: # get candidates tmpRet, candidateMaps = self.pd2p.getCandidates( self.userDatasetName, checkUsedFile=False, useHidden=True) if not tmpRet: self.endWithError( 'Failed to find candidate for destination') return False # collect all candidates allCandidates = [] for tmpDS, tmpDsVal in candidateMaps.iteritems(): for tmpCloud, tmpCloudVal in tmpDsVal.iteritems(): for tmpSiteName in tmpCloudVal[0]: if not tmpSiteName in allCandidates: allCandidates.append(tmpSiteName) if allCandidates == []: self.endWithError('No candidate for destination') return False # get list of dataset (container) names if eventPickNumSites > 1: # decompose container to transfer datasets separately tmpRet, tmpOut = self.pd2p.getListDatasetReplicasInContainer( self.userDatasetName) if not tmpRet: self.endWithError('Failed to get the size of %s' % self.userDatasetName) return False userDatasetNameList = tmpOut.keys() else: # transfer container at once userDatasetNameList = [self.userDatasetName] # loop over all datasets sitesUsed = [] for tmpUserDatasetName in userDatasetNameList: # get size of dataset container tmpRet, totalInputSize = rucioAPI.getDatasetSize( tmpUserDatasetName) if not tmpRet: self.endWithError('Failed to get the size of %s' % tmpUserDatasetName) return False # run brokerage tmpJob = JobSpec() tmpJob.AtlasRelease = '' self.putLog("run brokerage for %s" % tmpDS) brokerage.broker.schedule([tmpJob], self.taskBuffer, self.siteMapper, True, allCandidates, True, datasetSize=totalInputSize) if tmpJob.computingSite.startswith('ERROR'): self.endWithError('brokerage failed with %s' % tmpJob.computingSite) return False self.putLog("site -> %s" % tmpJob.computingSite) # send transfer request try: tmpDN = rucioAPI.parse_dn(tmpDN) tmpStatus, userInfo = rucioAPI.finger(tmpDN) if not tmpStatus: raise RuntimeError, 'user info not found for {0} with {1}'.format( tmpDN, userInfo) tmpDN = userInfo['nickname'] tmpDQ2ID = self.siteMapper.getSite( tmpJob.computingSite).ddm tmpMsg = "%s ds=%s site=%s id=%s" % ( 'registerDatasetLocation for DaTRI ', tmpUserDatasetName, tmpDQ2ID, tmpDN) self.putLog(tmpMsg) rucioAPI.registerDatasetLocation( tmpDS, [tmpDQ2ID], lifetime=14, owner=tmpDN, activity="User Subscriptions") self.putLog('OK') except: errType, errValue = sys.exc_info()[:2] tmpStr = 'Failed to send transfer request : %s %s' % ( errType, errValue) tmpStr.strip() tmpStr += traceback.format_exc() self.endWithError(tmpStr) return False # list of sites already used sitesUsed.append(tmpJob.computingSite) self.putLog("used %s sites" % len(sitesUsed)) # set candidates if len(sitesUsed) >= eventPickNumSites: # reset candidates to limit the number of sites allCandidates = sitesUsed sitesUsed = [] else: # remove site allCandidates.remove(tmpJob.computingSite) # send email notification for success tmpMsg = 'A transfer request was successfully sent to Rucio.\n' tmpMsg += 'Your task will get started once transfer is completed.' self.sendEmail(True, tmpMsg) try: # unlock and delete evp file fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_UN) self.evpFile.close() os.remove(self.evpFileName) except: pass # successfully terminated self.putLog("end %s" % self.evpFileName) return True except: errType, errValue = sys.exc_info()[:2] self.endWithError('Got exception %s:%s %s' % (errType, errValue, traceback.format_exc())) return False
print 'Cloud not known: %s'%cloud cloud = None files={'EVNT.012303._00545.pool.root.1':'rod.cloudtest1'} # UK #'mc12.005802.JF17_pythia_jet_filter.evgen.EVNT.v12003105_tid004541._01035.pool.root.1':'mc12.005802.JF17_pythia_jet_filter.evgen.EVNT.v12003105_tid004541', # CA # 'EVNT.012303._00901.pool.root.1':'mc12.005001.pythia_minbias.evgen.EVNT.v12000701_tid012303', jobList = [] for i in range(1): for lfn in files.keys(): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = commands.getoutput('uuidgen') job.AtlasRelease = 'Atlas-12.0.7' job.homepackage = 'AtlasProduction/12.0.7.1' # Need different args too # job.AtlasRelease = 'Atlas-13.0.30' # job.homepackage = 'AtlasProduction/13.0.30.2' job.transformation = 'csc_simul_trf.py' job.destinationDBlock = datasetName job.cloud = cloud job.computingSite = site # job.prodDBlock = 'mc12.005001.pythia_minbias.evgen.EVNT.v12000701_tid012303' job.prodDBlock = files[lfn] job.prodSourceLabel = 'test' # job.prodSourceLabel = 'cloudtest'
def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): '''prepare the subjob specific configuration''' # PandaTools from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.')) try: assert self.outsite except: logger.error("outsite not set. Aborting") raise Exception() job.backend.site = self.outsite job.backend.actualCE = self.outsite cloud = job._getRoot().backend.requirements.cloud job.backend.requirements.cloud = cloud # now just filling the job from AthenaMC data jspec = JobSpec() jspec.jobDefinitionID = job._getRoot().id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel if app.transform_archive: jspec.homepackage = 'AnalysisTransforms'+app.transform_archive elif app.prod_release: jspec.homepackage = 'AnalysisTransforms-AtlasProduction_'+str(app.prod_release) jspec.transformation = '%s/runAthena-00-00-11' % Client.baseURLSUB #---->???? prodDBlock and destinationDBlock when facing several input / output datasets? jspec.prodDBlock = 'NULL' if job.inputdata and len(app.inputfiles)>0 and app.inputfiles[0] in app.dsetmap: jspec.prodDBlock = app.dsetmap[app.inputfiles[0]] # How to specify jspec.destinationDBlock when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset outdset="" for type in ["EVNT","RDO","HITS","AOD","ESD","NTUP"]: if type in app.outputpaths.keys(): outdset=string.replace(app.outputpaths[type],"/",".") outdset=outdset[1:-1] break if not outdset: try: assert len(app.outputpaths.keys())>0 except: logger.error("app.outputpaths is empty: check your output datasets") raise type=app.outputpaths.keys()[0] outdset=string.replace(app.outputpaths[type],"/",".") outdset=outdset[1:-1] jspec.destinationDBlock = outdset jspec.destinationSE = self.outsite jspec.prodSourceLabel = 'user' jspec.assignedPriority = 1000 jspec.cloud = cloud # memory if job.backend.requirements.memory != -1: jspec.minRamCount = job.backend.requirements.memory jspec.computingSite = self.outsite jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel) # library (source files) flib = FileSpec() flib.lfn = self.library # flib.GUID = flib.type = 'input' # flib.status = flib.dataset = self.libDataset flib.dispatchDBlock = self.libDataset jspec.addFile(flib) # input files FIXME: many more input types for lfn in app.inputfiles: useguid=app.turls[lfn].replace("guid:","") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # add dbfiles if any: for lfn in app.dbfiles: useguid=app.dbturls[lfn].replace("guid:","") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # then minbias files for lfn in app.mbfiles: useguid=app.minbias_turls[lfn].replace("guid:","") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # then cavern files for lfn in app.cavernfiles: useguid=app.cavern_turls[lfn].replace("guid:","") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # output files( this includes the logfiles) # Output files jidtag="" job = app._getParent() # Returns job or subjob object if job._getRoot().subjobs: jidtag = job._getRoot().id else: jidtag = "%d" % job.id outfiles=app.subjobsOutfiles[job.id] pandaOutfiles={} for type in outfiles.keys(): pandaOutfiles[type]=outfiles[type]+"."+str(jidtag) if type=="LOG": pandaOutfiles[type]+=".tgz" #print pandaOutfiles for outtype in pandaOutfiles.keys(): fout = FileSpec() dset=string.replace(app.outputpaths[outtype],"/",".") dset=dset[1:-1] fout.dataset=dset fout.lfn=pandaOutfiles[outtype] fout.type = 'output' # fout.destinationDBlock = jspec.destinationDBlock fout.destinationDBlock = fout.dataset fout.destinationSE = jspec.destinationSE if outtype=='LOG': fout.type='log' fout.destinationDBlock = fout.dataset fout.destinationSE = job.backend.site jspec.addFile(fout) # job parameters param = '-l %s ' % self.library # user tarball. # use corruption checker if job.backend.requirements.corCheck: param += '--corCheck ' # disable to skip missing files if job.backend.requirements.notSkipMissing: param += '--notSkipMissing ' # transform parameters # need to update arglist with final output file name... newArgs=[] if app.mode == "evgen": app.args[3]=app.args[3]+" -t " if app.verbosity: app.args[3]=app.args[3]+" -l %s " % app.verbosity for arg in app.args[3:]: for type in outfiles.keys(): if arg.find(outfiles[type])>-1: arg=arg.replace(outfiles[type],pandaOutfiles[type]) newArgs.append(arg) arglist=string.join(newArgs," ") # print "Arglist:",arglist param += ' -r ./ ' param += ' -j "%s"' % urllib.quote(arglist) allinfiles=app.inputfiles+app.dbfiles # Input files. param += ' -i "%s" ' % allinfiles if len(app.mbfiles)>0: param+= ' -m "%s" ' % app.mbfiles if len(app.cavernfiles)>0: param+= ' -n "%s" ' % app.cavernfiles # param += '-m "[]" ' #%minList FIXME # param += '-n "[]" ' #%cavList FIXME del pandaOutfiles["LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore... param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items()) # source URL matchURL = re.search("(http.*://[^/]+)/",Client.baseURLSSL) if matchURL != None: param += " --sourceURL %s " % matchURL.group(1) param += " --trf" jspec.jobParameters = param jspec.metadata="--trf \"%s\"" % arglist #print "SUBJOB DETAILS:",jspec.values() if app.dryrun: print "job.application.dryrun activated, printing out job parameters" print jspec.values() return return jspec
import time import commands import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec site = sys.argv[1] cloud = sys.argv[2] datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = None jobList = [] for i in range(1): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i) job.AtlasRelease = 'Atlas-15.6.10' job.homepackage = 'AtlasProduction/15.6.10.1' job.transformation = 'Evgen_trf.py' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 10000 job.prodSourceLabel = 'test' job.computingSite = site job.cloud = cloud job.cmtConfig = 'i686-slc5-gcc43-opt' file = FileSpec() file.lfn = "%s.evgen.pool.root" % job.jobName
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """Prepare the specific aspec of each subjob. Returns: subjobconfig list of objects understood by backends.""" from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs # make sure we have the correct siteType refreshPandaSpecs() job = app._getParent() masterjob = job._getRoot() logger.debug('ProdTransPandaRTHandler prepare called for %s', job.getFQID('.')) job.backend.actualCE = job.backend.site job.backend.requirements.cloud = Client.PandaSites[job.backend.site]['cloud'] # check that the site is in a submit-able status if not job.splitter or job.splitter._name != 'DQ2JobSplitter': allowed_sites = job.backend.list_ddm_sites() try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] tmpDsExist = False if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')): #if Client.getDatasets(job.outputdata.datasetname): if getDatasets(job.outputdata.datasetname): tmpDsExist = True logger.info('Re-using output dataset %s'%job.outputdata.datasetname) if not configPanda['specialHandling']=='ddm:rucio' and not configPanda['processingType'].startswith('gangarobot') and not configPanda['processingType'].startswith('hammercloud') and not configPanda['processingType'].startswith('rucio_test'): Client.addDataset(job.outputdata.datasetname,False,location=outDsLocation,allowProdDisk=True,dsExist=tmpDsExist) logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation) except exceptions.SystemExit: raise BackendError('Panda','Exception in adding dataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1])) # JobSpec. jspec = JobSpec() jspec.currentPriority = app.priority jspec.jobDefinitionID = masterjob.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.coreCount = app.core_count jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release jspec.homepackage = app.home_package jspec.transformation = app.transformation jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = job.backend.site if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' if app.prod_source_label: jspec.prodSourceLabel = app.prod_source_label else: jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.specialHandling = configPanda['specialHandling'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.cmtConfig = app.atlas_cmtconfig if app.dbrelease == 'LATEST': try: latest_dbrelease = getLatestDBReleaseCaching() except: from pandatools import Client latest_dbrelease = Client.getLatestDBRelease() m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease) if m: self.dbrelease_dataset = m.group(1) self.dbrelease = m.group(2) else: raise ApplicationConfigurationError(None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually.") else: self.dbrelease_dataset = app.dbrelease_dataset self.dbrelease = app.dbrelease jspec.jobParameters = app.job_parameters if self.dbrelease: if self.dbrelease == 'current': jspec.jobParameters += ' --DBRelease=current' else: if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,) else: jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,) dbspec = FileSpec() dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease dbspec.dataset = self.dbrelease_dataset dbspec.prodDBlock = jspec.prodDBlock dbspec.type = 'input' jspec.addFile(dbspec) if job.inputdata: m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)', job.inputdata.dataset[0]) if not m: logger.error("Error retrieving run number from dataset name") #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name") runnumber = 105200 else: runnumber = int(m.group(2)) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --runNumber %d' % runnumber else: jspec.jobParameters += ' RunNumber=%d' % runnumber # Output files. randomized_lfns = [] ilfn = 0 for lfn, lfntype in zip(app.output_files,app.output_type): ofspec = FileSpec() if app.randomize_lfns: randomized_lfn = lfn + ('.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4] ) ) else: randomized_lfn = lfn ofspec.lfn = randomized_lfn randomized_lfns.append(randomized_lfn) ofspec.destinationDBlock = jspec.destinationDBlock ofspec.destinationSE = jspec.destinationSE ofspec.dataset = jspec.destinationDBlock ofspec.type = 'output' jspec.addFile(ofspec) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --output%sFile %s' % (lfntype, randomized_lfns[ilfn]) else: jspec.jobParameters += ' output%sFile=%s' % (lfntype, randomized_lfns[ilfn]) ilfn=ilfn+1 # Input files. if job.inputdata: for guid, lfn, size, checksum, scope in zip(job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): ifspec = FileSpec() ifspec.lfn = lfn ifspec.GUID = guid ifspec.fsize = size ifspec.md5sum = checksum ifspec.scope = scope ifspec.dataset = jspec.prodDBlock ifspec.prodDBlock = jspec.prodDBlock ifspec.type = 'input' jspec.addFile(ifspec) if app.input_type: itype = app.input_type else: itype = m.group(5) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(job.inputdata.names)) else: jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(job.inputdata.names)) # Log files. lfspec = FileSpec() lfspec.lfn = '%s.job.log.tgz' % jspec.jobName lfspec.destinationDBlock = jspec.destinationDBlock lfspec.destinationSE = jspec.destinationSE lfspec.dataset = jspec.destinationDBlock lfspec.type = 'log' jspec.addFile(lfspec) return jspec
def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue, resource_name): # list with a lock inputListWorld = ListWithLock([]) # variables for submission maxBunchTask = 100 # make logger tmpLog = MsgWrapper(logger) tmpLog.debug('start doBrokerage') # return for failure retFatal = self.SC_FATAL retTmpError = self.SC_FAILED tmpLog.debug( 'vo={0} label={1} queue={2} resource_name={3} nTasks={4}'.format( vo, prodSourceLabel, workQueue.queue_name, resource_name, len(inputList))) # loop over all tasks allRwMap = {} prioMap = {} tt2Map = {} expRWs = {} jobSpecList = [] for tmpJediTaskID, tmpInputList in inputList: for taskSpec, cloudName, inputChunk in tmpInputList: # collect tasks for WORLD if taskSpec.useWorldCloud(): inputListWorld.append((taskSpec, inputChunk)) continue # make JobSpec to be submitted for TaskAssigner jobSpec = JobSpec() jobSpec.taskID = taskSpec.jediTaskID jobSpec.jediTaskID = taskSpec.jediTaskID # set managed to trigger TA jobSpec.prodSourceLabel = 'managed' jobSpec.processingType = taskSpec.processingType jobSpec.workingGroup = taskSpec.workingGroup jobSpec.metadata = taskSpec.processingType jobSpec.assignedPriority = taskSpec.taskPriority jobSpec.currentPriority = taskSpec.currentPriority jobSpec.maxDiskCount = ( taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) // 1024 // 1024 if taskSpec.useWorldCloud(): # use destinationSE to trigger task brokerage in WORLD cloud jobSpec.destinationSE = taskSpec.cloud prodDBlock = None setProdDBlock = False for datasetSpec in inputChunk.getDatasets(): prodDBlock = datasetSpec.datasetName if datasetSpec.isMaster(): jobSpec.prodDBlock = datasetSpec.datasetName setProdDBlock = True for fileSpec in datasetSpec.Files: tmpInFileSpec = fileSpec.convertToJobFileSpec( datasetSpec) jobSpec.addFile(tmpInFileSpec) # use secondary dataset name as prodDBlock if setProdDBlock is False and prodDBlock is not None: jobSpec.prodDBlock = prodDBlock # append jobSpecList.append(jobSpec) prioMap[jobSpec.taskID] = jobSpec.currentPriority tt2Map[jobSpec.taskID] = jobSpec.processingType # get RW for a priority if jobSpec.currentPriority not in allRwMap: tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI( vo, prodSourceLabel, workQueue, jobSpec.currentPriority) if tmpRW is None: tmpLog.error( 'failed to calculate RW with prio={0}'.format( jobSpec.currentPriority)) return retTmpError allRwMap[jobSpec.currentPriority] = tmpRW # get expected RW expRW = self.taskBufferIF.calculateTaskRW_JEDI( jobSpec.jediTaskID) if expRW is None: tmpLog.error( 'failed to calculate RW for jediTaskID={0}'.format( jobSpec.jediTaskID)) return retTmpError expRWs[jobSpec.taskID] = expRW # for old clouds if jobSpecList != []: # get fullRWs fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI( vo, prodSourceLabel, None, None) if fullRWs is None: tmpLog.error('failed to calculate full RW') return retTmpError # set metadata for jobSpec in jobSpecList: rwValues = allRwMap[jobSpec.currentPriority] jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % ( jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap), str(fullRWs), str(tt2Map)) tmpLog.debug('run task assigner for {0} tasks'.format( len(jobSpecList))) nBunchTask = 0 while nBunchTask < len(jobSpecList): # get a bunch jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask] strIDs = 'jediTaskID=' for tmpJobSpec in jobsBunch: strIDs += '{0},'.format(tmpJobSpec.taskID) strIDs = strIDs[:-1] tmpLog.debug(strIDs) # increment index nBunchTask += maxBunchTask # run task brokerge stS, outSs = PandaClient.runTaskAssignment(jobsBunch) tmpLog.debug('{0}:{1}'.format(stS, str(outSs))) # for WORLD if len(inputListWorld) > 0: # thread pool threadPool = ThreadPool() # get full RW for WORLD fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI( vo, prodSourceLabel, None, None) if fullRWs is None: tmpLog.error('failed to calculate full WORLD RW') return retTmpError # get RW per priority for taskSpec, inputChunk in inputListWorld: if taskSpec.currentPriority not in allRwMap: tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI( vo, prodSourceLabel, workQueue, taskSpec.currentPriority) if tmpRW is None: tmpLog.error( 'failed to calculate RW with prio={0}'.format( taskSpec.currentPriority)) return retTmpError allRwMap[taskSpec.currentPriority] = tmpRW # live counter for RWs liveCounter = MapWithLock(allRwMap) # make workers ddmIF = self.ddmIF.getInterface(vo) for iWorker in range(4): thr = AtlasProdTaskBrokerThread(inputListWorld, threadPool, self.taskBufferIF, ddmIF, fullRWs, liveCounter, workQueue) thr.start() threadPool.join(60 * 10) # return tmpLog.debug('doBrokerage done') return self.SC_SUCCEEDED
else: site = None datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'BNL_ATLAS_2' files = { 'EVNT.019128._00011.pool.root.1': None, } jobList = [] index = 0 for lfn in files.keys(): index += 1 job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index) job.AtlasRelease = 'Atlas-13.0.40' job.homepackage = 'AtlasProduction/13.0.40.3' job.transformation = 'csc_simul_trf.py' job.destinationDBlock = datasetName job.destinationSE = destName job.computingSite = site job.prodDBlock = 'valid1.005001.pythia_minbias.evgen.EVNT.e306_tid019128' job.prodSourceLabel = 'test' job.currentPriority = 10000 job.cloud = 'IT' fileI = FileSpec()
def createJobSpec(self, task, outdataset, job, jobset, jobdef, site, jobname, lfnhanger, allsites, jobid): """Create a spec for one job :arg TaskWorker.DataObject.Task task: the task to work on :arg str outdataset: the output dataset name where all the produced files will be placed :arg WMCore.DataStructs.Job job: the abstract job :arg int jobset: the PanDA jobset corresponding to the current task :arg int jobdef: the PanDA jobdef where to append the current jobs --- not used :arg str site: the borkered site where to run the jobs :arg str jobname: the job name :arg str lfnhanger: the random string to be added in the output file name :arg list str allsites: all possible sites where the job can potentially run :arg int jobid: incremental job number :return: the sepc object.""" pandajob = JobSpec() ## always setting a job definition ID pandajob.jobDefinitionID = jobdef if jobdef else -1 ## always setting a job set ID pandajob.jobsetID = jobset if jobset else -1 pandajob.jobName = jobname pandajob.prodUserID = task['tm_user_dn'] pandajob.destinationDBlock = outdataset pandajob.prodDBlock = task['tm_input_dataset'] pandajob.prodSourceLabel = 'user' pandajob.computingSite = site pandajob.cloud = getSite(pandajob.computingSite) pandajob.destinationSE = 'local' pandajob.transformation = task['tm_transformation'] ## need to initialize this pandajob.metadata = '' def outFileSpec(of=None, log=False): """Local routine to create an FileSpec for the an job output/log file :arg str of: output file base name :return: FileSpec object for the output file.""" outfile = FileSpec() if log: outfile.lfn = "job.log_%d_%s.tgz" % (jobid, lfnhanger) outfile.type = 'log' else: outfile.lfn = '%s_%d_%s%s' %(os.path.splitext(of)[0], jobid, lfnhanger, os.path.splitext(of)[1]) outfile.type = 'output' outfile.destinationDBlock = pandajob.destinationDBlock outfile.destinationSE = task['tm_asyncdest'] outfile.dataset = pandajob.destinationDBlock return outfile alloutfiles = [] outjobpar = {} outfilestring = '' for outputfile in task['tm_outfiles']: outfilestring += '%s,' % outputfile filespec = outFileSpec(outputfile) alloutfiles.append(filespec) #pandajob.addFile(filespec) outjobpar[outputfile] = filespec.lfn for outputfile in task['tm_tfile_outfiles']: outfilestring += '%s,' % outputfile filespec = outFileSpec(outputfile) alloutfiles.append(filespec) #pandajob.addFile(filespec) outjobpar[outputfile] = filespec.lfn for outputfile in task['tm_edm_outfiles']: outfilestring += '%s,' % outputfile filespec = outFileSpec(outputfile) alloutfiles.append(filespec) #pandajob.addFile(filespec) outjobpar[outputfile] = filespec.lfn outfilestring = outfilestring[:-1] infiles = [] for inputfile in job['input_files']: infiles.append( inputfile['lfn'] ) pandajob.jobParameters = '-a %s ' % task['tm_user_sandbox'] pandajob.jobParameters += '--sourceURL %s ' % task['tm_cache_url'] pandajob.jobParameters += '--jobNumber=%s ' % jobid pandajob.jobParameters += '--cmsswVersion=%s ' % task['tm_job_sw'] pandajob.jobParameters += '--scramArch=%s ' % task['tm_job_arch'] pandajob.jobParameters += '--inputFile=\'%s\' ' % json.dumps(infiles) self.jobParametersSetting(pandajob, job, self.jobtypeMapper[task['tm_job_type']]) pandajob.jobParameters += '-o "%s" ' % str(outjobpar) pandajob.jobParameters += '--dbs_url=%s ' % task['tm_dbs_url'] pandajob.jobParameters += '--publish_dbs_url=%s ' % task['tm_publish_dbs_url'] pandajob.jobParameters += '--publishFiles=%s ' % ('True' if task['tm_publication'] == 'T' else 'False') pandajob.jobParameters += '--saveLogs=%s ' % ('True' if task['tm_save_logs'] == 'T' else 'False') pandajob.jobParameters += '--availableSites=\'%s\' ' %json.dumps(allsites) pandajob.jobParameters += '--group=%s ' % (task['tm_user_group'] if task['tm_user_group'] else '') pandajob.jobParameters += '--role=%s ' % (task['tm_user_role'] if task['tm_user_role'] else '') self.logger.info(type(task['tm_user_infiles'])) self.logger.info(task['tm_user_infiles']) if task['tm_user_infiles']: addinfilestring = '' for addinfile in task['tm_user_infiles']: addinfilestring += '%s,' % addinfile pandajob.jobParameters += '--userFiles=%s ' % ( addinfilestring[:-1] ) pandajob.jobName = '%s' % task['tm_taskname'] #Needed by ASO and Dashboard if 'panda_oldjobid' in job and job['panda_oldjobid']: pandajob.parentID = job['panda_oldjobid'] pandajob.addFile(outFileSpec(log=True)) for filetoadd in alloutfiles: pandajob.addFile(filetoadd) return pandajob
def master_prepare(self,app,appconfig): '''Prepare the master job''' from pandatools import Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.')) # set chirp variables if configPanda['chirpconfig'] or configPanda['chirpserver']: setChirpVariables() # Pack inputsandbox inputsandbox = 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null') inpw = job.getInputWorkspace() # add user script to inputsandbox if hasattr(job.application.exe, "name"): if not job.application.exe in job.inputsandbox: job.inputsandbox.append(job.application.exe) for fname in [f.name for f in job.inputsandbox]: fname.rstrip(os.sep) path = fname[:fname.rfind(os.sep)] f = fname[fname.rfind(os.sep)+1:] rc, output = commands.getstatusoutput('tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f)) if rc: logger.error('Packing inputsandbox failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError('Packing inputsandbox failed.') if len(job.inputsandbox) > 0: rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox))) if rc: logger.error('Packing inputsandbox failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError('Packing inputsandbox failed.') inputsandbox += ".gz" else: inputsandbox = None # Upload Inputsandbox if inputsandbox: logger.debug('Uploading source tarball ...') uploadSources(inpw.getPath(),os.path.basename(inputsandbox)) self.inputsandbox = inputsandbox else: self.inputsandbox = None # input dataset if job.inputdata: if job.inputdata._name != 'DQ2Dataset': raise ApplicationConfigurationError('PANDA application supports only DQ2Datasets') # run brokerage here if not splitting if not job.splitter: from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) elif job.splitter._name not in ['DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask']: raise ApplicationConfigurationError('Panda splitter must be DQ2JobSplitter or ArgSplitter') if job.backend.site == 'AUTO': raise ApplicationConfigurationError('site is still AUTO after brokerage!') # output dataset if job.outputdata: if job.outputdata._name != 'DQ2OutputDataset': raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset') else: logger.info('Adding missing DQ2OutputDataset') job.outputdata = DQ2OutputDataset() job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname) self.outDsLocation = Client.PandaSites[job.backend.site]['ddm'] try: Client.addDataset(job.outputdata.datasetname,False,location=self.outDsLocation) logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,self.outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation) except exceptions.SystemExit: raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1])) # handle the libds if job.backend.libds: self.libDataset = job.backend.libds self.fileBO = getLibFileSpecFromLibDS(self.libDataset) self.library = self.fileBO.lfn elif job.backend.bexec: self.libDataset = job.outputdata.datasetname+'.lib' self.library = '%s.tgz' % self.libDataset try: Client.addDataset(self.libDataset,False,location=self.outDsLocation) dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation) logger.info('Lib dataset %s registered at %s'%(self.libDataset,self.outDsLocation)) except exceptions.SystemExit: raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(self.libDataset,sys.exc_info()[0],sys.exc_info()[1])) # collect extOutFiles self.extOutFile = [] for tmpName in job.outputdata.outputdata: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.outputsandbox: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.backend.extOutFile: if tmpName != '': self.extOutFile.append(tmpName) # create build job if job.backend.bexec != '': jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.transformation = '%s/buildGen-00-00-01' % Client.baseURLSUB if Client.isDQ2free(job.backend.site): jspec.destinationDBlock = '%s/%s' % (job.outputdata.datasetname,self.libDataset) jspec.destinationSE = 'local' else: jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = configPanda['prodSourceLabelBuild'] jspec.processingType = configPanda['processingType'] jspec.assignedPriority = configPanda['assignedPriorityBuild'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.jobParameters = '-o %s' % (self.library) if self.inputsandbox: jspec.jobParameters += ' -i %s' % (self.inputsandbox) else: raise ApplicationConfigurationError('Executable on Panda with build job defined, but inputsandbox is emtpy !') matchURL = re.search('(http.*://[^/]+)/',Client.baseURLCSRVSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1) if job.backend.bexec != '': jspec.jobParameters += ' --bexec "%s" ' % urllib.quote(job.backend.bexec) jspec.jobParameters += ' -r %s ' % '.' fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) return jspec else: return None
import time import commands import userinterface.Client as Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec site = sys.argv[1] cloud = sys.argv[2] datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen') destName = None jobList = [] for i in range(1): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i) job.AtlasRelease = 'Atlas-17.0.5' job.homepackage = 'AtlasProduction/17.0.5.6' job.transformation = 'Evgen_trf.py' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 10000 job.prodSourceLabel = 'test' job.computingSite = site job.cloud = cloud job.cmtConfig = 'i686-slc5-gcc43-opt' file = FileSpec() file.lfn = "%s.evgen.pool.root" % job.jobName
site = sys.argv[1] cloud='CA' elif len(sys.argv)==3: site = sys.argv[1] cloud=sys.argv[2] else: site = None cloud = None datasetName = 'panda.destDB.%s_tid999991' % commands.getoutput('uuidgen') taskid = 999989 jobList = [] for i in range(1): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),i) # job.AtlasRelease = 'Atlas-12.0.6' # job.homepackage = 'AtlasProduction/12.0.6.5' job.AtlasRelease = 'Atlas-12.0.7' job.homepackage = 'AtlasProduction/12.0.7.1' job.transformation = 'csc_evgen_trf.py' job.destinationDBlock = datasetName # job.destinationSE = destName # job.cloud = 'CA' job.cloud = cloud job.taskID = taskid job.currentPriority = 1000 job.prodSourceLabel = 'test'
def run(self): try: while True: _logger.debug('%s start' % self.pandaID) # query job job = self.taskBuffer.peekJobs([self.pandaID],fromDefined=False, fromArchived=False,fromWaiting=False)[0] _logger.debug('%s in %s' % (self.pandaID, job.jobStatus)) # check job status if job == None: _logger.debug('%s escape : not found' % self.pandaID) return if not job.jobStatus in ['running','sent','starting','holding', 'stagein','stageout']: if job.jobStatus == 'transferring' and (job.prodSourceLabel in ['user','panda'] or job.jobSubStatus not in [None, 'NULL', '']): pass else: _logger.debug('%s escape : %s' % (self.pandaID,job.jobStatus)) return # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=self.sleepTime) if job.modificationTime < timeLimit or (job.endTime != 'NULL' and job.endTime < timeLimit): _logger.debug('%s %s lastmod:%s endtime:%s' % (job.PandaID,job.jobStatus, str(job.modificationTime), str(job.endTime))) destDBList = [] if job.jobStatus == 'sent': # sent job didn't receive reply from pilot within 30 min job.jobDispatcherErrorCode = ErrorCode.EC_SendError job.jobDispatcherErrorDiag = "Sent job didn't receive reply from pilot within 30 min" elif job.exeErrorDiag == 'NULL' and job.pilotErrorDiag == 'NULL': # lost heartbeat job.jobDispatcherErrorCode = ErrorCode.EC_Watcher if job.jobDispatcherErrorDiag == 'NULL': if job.endTime == 'NULL': # normal lost heartbeat job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(job.modificationTime) else: # job recovery failed job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(job.endTime) if job.jobStatus == 'transferring': job.jobDispatcherErrorDiag += ' in transferring' # get worker workerSpecs = self.taskBuffer.getWorkersForJob(job.PandaID) if len(workerSpecs) > 0: workerSpec = workerSpecs[0] if workerSpec.status in ['finished', 'failed', 'cancelled', 'missed']: job.supErrorCode = SupErrors.error_codes['WORKER_ALREADY_DONE'] job.supErrorDiag = 'worker already {0} at {1} with {2}'.format(workerSpec.status, str(workerSpec.endTime), workerSpec.diagMessage) job.supErrorDiag = JobSpec.truncateStringAttr('supErrorDiag', job.supErrorDiag) else: # job recovery failed job.jobDispatcherErrorCode = ErrorCode.EC_Recovery job.jobDispatcherErrorDiag = 'job recovery failed for %s hours' % (self.sleepTime/60) # set job status job.jobStatus = 'failed' # set endTime for lost heartbeat if job.endTime == 'NULL': # normal lost heartbeat job.endTime = job.modificationTime # set files status for file in job.Files: if file.type == 'output' or file.type == 'log': file.status = 'failed' if not file.destinationDBlock in destDBList: destDBList.append(file.destinationDBlock) # event service if EventServiceUtils.isEventServiceJob(job) and not EventServiceUtils.isJobCloningJob(job): eventStat = self.taskBuffer.getEventStat(job.jediTaskID, job.PandaID) # set sub status when no sucessful events if EventServiceUtils.ST_finished not in eventStat: job.jobSubStatus = 'es_heartbeat' # update job self.taskBuffer.updateJobs([job],False) # start closer if job.jobStatus == 'failed': source = 'jobDispatcherErrorCode' error_code = job.jobDispatcherErrorCode error_diag = job.jobDispatcherErrorDiag try: _logger.debug("Watcher will call apply_retrial_rules") retryModule.apply_retrial_rules(self.taskBuffer, job.PandaID, source, error_code, error_diag, job.attemptNr) _logger.debug("apply_retrial_rules is back") except Exception as e: _logger.debug("apply_retrial_rules excepted and needs to be investigated (%s): %s"%(e, traceback.format_exc())) # updateJobs was successful and it failed a job with taskBufferErrorCode try: _logger.debug("Watcher.run will peek the job") job_tmp = self.taskBuffer.peekJobs([job.PandaID], fromDefined=False, fromArchived=True, fromWaiting=False)[0] if job_tmp.taskBufferErrorCode: source = 'taskBufferErrorCode' error_code = job_tmp.taskBufferErrorCode error_diag = job_tmp.taskBufferErrorDiag _logger.debug("Watcher.run 2 will call apply_retrial_rules") retryModule.apply_retrial_rules(self.taskBuffer, job_tmp.PandaID, source, error_code, error_diag, job_tmp.attemptNr) _logger.debug("apply_retrial_rules 2 is back") except IndexError: pass except Exception as e: self.logger.error("apply_retrial_rules 2 excepted and needs to be investigated (%s): %s" % (e, traceback.format_exc())) cThr = Closer(self.taskBuffer,destDBList,job) cThr.start() cThr.join() _logger.debug('%s end' % job.PandaID) return # single action if self.single: return # sleep time.sleep(60*self.sleepTime) except: type, value, traceBack = sys.exc_info() _logger.error("run() : %s %s" % (type,value)) return