Ejemplo n.º 1
0
    def createJob(self,
                  name,
                  nodes,
                  walltime,
                  command,
                  inputs=None,
                  queuename=None):
        job = JobSpec()
        job.jobDefinitionID = int(time.time()) % 10000
        job.jobName = "%s" % commands.getoutput('uuidgen')
        job.VO = self.vo
        job.transformation = self.transformation

        job.destinationDBlock = self.datasetName
        job.destinationSE = self.destName
        job.currentPriority = self.currentPriority
        job.prodSourceLabel = self.prodSourceLabel
        job.computingSite = self.site if queuename is None else queuename

        lqcd_command = {
            "nodes": nodes,
            "walltime": walltime,
            "name": name,
            "command": command
        }

        job.jobParameters = json.dumps(lqcd_command)

        fileOL = FileSpec()
        fileOL.lfn = "%s.job.log.tgz" % job.jobName
        fileOL.destinationDBlock = job.destinationDBlock
        fileOL.destinationSE = job.destinationSE
        fileOL.dataset = job.destinationDBlock
        fileOL.type = 'log'
        job.addFile(fileOL)
        job.cmtConfig = inputs

        return job
Ejemplo n.º 2
0
from taskbuffer.JobSpec     import JobSpec
from taskbuffer.FileSpec    import FileSpec
from taskbuffer.DatasetSpec import DatasetSpec
from taskbuffer.DBProxyPool import DBProxyPool

import getpass
passwd = getpass.getpass()

pool = DBProxyPool('adbpro.usatlas.bnl.gov',passwd,2)

proxy = pool.getProxy()

import sys
import commands

job1 = JobSpec()
job1.PandaID='NULL'
job1.jobStatus='unknown'
job1.computingSite="aaa"
f11 = FileSpec()
f11.lfn = 'in1.pool.root'
f11.type = 'input'
job1.addFile(f11)
f12 = FileSpec()
f12.lfn = 'out1.pool.root'
f12.type = 'output'
job1.addFile(f12)

job2 = JobSpec()
job2.PandaID='NULL'
job2.jobStatus='unknown'
Ejemplo n.º 3
0
 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue):
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug("start doBrokerage")
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug("vo={0} label={1} queue={2}".format(vo, prodSourceLabel, workQueue.queue_name))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = "managed"
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue, jobSpec.currentPriority
                 )
                 if tmpRW == None:
                     tmpLog.error("failed to calculate RW with prio={0}".format(jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error("failed to calculate RW for jediTaskID={0}".format(jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # get fullRWs
     fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo, prodSourceLabel, None, None)
     if fullRWs == None:
         tmpLog.error("failed to calculate full RW")
         return retTmpError
     # set metadata
     for jobSpec in jobSpecList:
         rwValues = allRwMap[jobSpec.currentPriority]
         jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
             jobSpec.metadata,
             str(rwValues),
             str(expRWs),
             str(prioMap),
             str(fullRWs),
             str(tt2Map),
         )
     tmpLog.debug("run task assigner for {0} tasks".format(len(jobSpecList)))
     nBunchTask = 0
     while nBunchTask < len(jobSpecList):
         # get a bunch
         jobsBunch = jobSpecList[nBunchTask : nBunchTask + maxBunchTask]
         strIDs = "jediTaskID="
         for tmpJobSpec in jobsBunch:
             strIDs += "{0},".format(tmpJobSpec.taskID)
         strIDs = strIDs[:-1]
         tmpLog.debug(strIDs)
         # increment index
         nBunchTask += maxBunchTask
         # run task brokerge
         stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
         tmpLog.debug("{0}:{1}".format(stS, str(outSs)))
     # return
     tmpLog.debug("done")
     return self.SC_SUCCEEDED
Ejemplo n.º 4
0
 def doBrokerage(self,inputList,vo,prodSourceLabel,workQueue):
     # list with a lock
     inputListWorld = ListWithLock([])
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal    = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug('vo={0} label={1} queue={2} nTasks={3}'.format(vo,prodSourceLabel,
                                                                 workQueue.queue_name,
                                                                 len(inputList)))
     # loop over all tasks
     allRwMap    = {}
     prioMap     = {}
     tt2Map      = {}
     expRWs      = {}
     jobSpecList = []
     for tmpJediTaskID,tmpInputList in inputList:
         for taskSpec,cloudName,inputChunk in tmpInputList:
             # collect tasks for WORLD
             if taskSpec.useWorldCloud():
                 inputListWorld.append((taskSpec,inputChunk))
                 continue
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID     = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel  = 'managed'
             jobSpec.processingType   = taskSpec.processingType
             jobSpec.workingGroup     = taskSpec.workingGroup
             jobSpec.metadata         = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority  = taskSpec.currentPriority
             jobSpec.maxDiskCount     = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID]  = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,workQueue,
                                                                    jobSpec.currentPriority) 
                 if tmpRW == None:
                     tmpLog.error('failed to calculate RW with prio={0}'.format(jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error('failed to calculate RW for jediTaskID={0}'.format(jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # for old clouds
     if jobSpecList != []:
         # get fullRWs
         fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,None,None)
         if fullRWs == None:
             tmpLog.error('failed to calculate full RW')
             return retTmpError
         # set metadata
         for jobSpec in jobSpecList:
             rwValues = allRwMap[jobSpec.currentPriority]
             jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (jobSpec.metadata,
                                                       str(rwValues),str(expRWs),
                                                       str(prioMap),str(fullRWs),
                                                       str(tt2Map))
         tmpLog.debug('run task assigner for {0} tasks'.format(len(jobSpecList)))
         nBunchTask = 0
         while nBunchTask < len(jobSpecList):
             # get a bunch
             jobsBunch = jobSpecList[nBunchTask:nBunchTask+maxBunchTask]
             strIDs = 'jediTaskID='
             for tmpJobSpec in jobsBunch:
                 strIDs += '{0},'.format(tmpJobSpec.taskID)
             strIDs = strIDs[:-1]
             tmpLog.debug(strIDs)
             # increment index
             nBunchTask += maxBunchTask
             # run task brokerge
             stS,outSs = PandaClient.runTaskAssignment(jobsBunch)
             tmpLog.debug('{0}:{1}'.format(stS,str(outSs)))
     # for WORLD
     if len(inputListWorld) > 0:
         # thread pool
         threadPool = ThreadPool()
         # get full RW for WORLD
         fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,None,None)
         if fullRWs == None:
             tmpLog.error('failed to calculate full WORLD RW')
             return retTmpError
         # get RW per priority
         for taskSpec,inputChunk in inputListWorld:
             if not taskSpec.currentPriority in allRwMap:
                 tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,workQueue,
                                                                         taskSpec.currentPriority)
                 if tmpRW == None:
                     tmpLog.error('failed to calculate RW with prio={0}'.format(taskSpec.currentPriority))
                     return retTmpError
                 allRwMap[taskSpec.currentPriority] = tmpRW
         # live counter for RWs
         liveCounter = MapWithLock(allRwMap)
         # make workers
         ddmIF = self.ddmIF.getInterface(vo)
         for iWorker in range(4):
             thr = AtlasProdTaskBrokerThread(inputListWorld,threadPool,
                                             self.taskBufferIF,ddmIF,
                                             fullRWs,liveCounter)
             thr.start()
         threadPool.join(60*10)
     # return
     tmpLog.debug('doBrokerage done')
     return self.SC_SUCCEEDED
Ejemplo n.º 5
0
    if format == 'HITS':
        step = 'simul'
    # append
    oDatasets.append('%s.%s.%s.%s_tid%06d' %
                     (m.group(1), step, format, m.group(3), int(taskID)))

# log dataset
lDataset = '%s.%s.%s.%s_tid%06d' % (m.group(1), m.group(2), 'log', m.group(3),
                                    int(taskID))

# instantiate JobSpecs
iJob = 0
jobList = []
for line in taskFile:
    iJob += 1
    job = JobSpec()
    # job ID  ###### FIXME
    job.jobDefinitionID = int(time.time()) % 10000
    # job name
    job.jobName = "%s_%05d.job" % (taskName, iJob)
    # AtlasRelease
    if len(re.findall('\.', trfVer)) > 2:
        match = re.search('^(\d+\.\d+\.\d+)', trfVer)
        job.AtlasRelease = 'Atlas-%s' % match.group(1)
    else:
        job.AtlasRelease = 'Atlas-%s' % trfVer
    # homepackage
    vers = trfVer.split('.')
    if int(vers[0]) <= 11:
        job.homepackage = 'JobTransforms'
        for ver in vers:
Ejemplo n.º 6
0
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

if len(sys.argv)>1:
    site = sys.argv[1]
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName    = 'BNL_ATLAS_2'

jobList = []
for i in range(20):
    
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = commands.getoutput('uuidgen') 
    job.AtlasRelease      = 'Atlas-11.0.41'
    #job.AtlasRelease      = 'Atlas-11.0.3'
    job.homepackage       = 'AnalysisTransforms'
    job.transformation    = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.currentPriority   = 100
    job.prodSourceLabel   = 'user'
    job.computingSite     = site
    #job.prodDBlock        = "pandatest.b1599dfa-cd36-4fc5-92f6-495781a94c66"
    job.prodDBlock        = "pandatest.f228b051-077b-4f81-90bf-496340644379"
    
    fileI = FileSpec()
Ejemplo n.º 7
0
from taskbuffer.FileSpec import FileSpec

aSrvID = None

for idx, argv in enumerate(sys.argv):
    if argv == '-s':
        aSrvID = sys.argv[idx + 1]
        sys.argv = sys.argv[:idx]
        break

site = sys.argv[1]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = 'local'

job = JobSpec()
job.jobDefinitionID = int(time.time()) % 10000
job.jobName = "%s" % commands.getoutput('uuidgen')
# MPI transform on Titan that will run actual job
job.transformation = '/lustre/atlas/proj-shared/csc108/panitkin/alicetest1/m\
pi_wrapper_alice_ppbench.py'

job.destinationDBlock = datasetName
job.destinationSE = destName
job.currentPriority = 1000
job.prodSourceLabel = 'panda'
job.computingSite = site
job.jobParameters = " "
job.VO = 'alice'

fileOL = FileSpec()
Ejemplo n.º 8
0
    if argv == '-s':
        aSrvID = sys.argv[idx+1]
        sys.argv = sys.argv[:idx]
        break

#site = sys.argv[1]
site = 'ANALY_BNL-LSST'  #orig
#site = 'BNL-LSST'
#site = 'SWT2_CPB-LSST'
#site = 'UTA_SWT2-LSST'
#site = 'ANALY_SWT2_CPB-LSST'

datasetName = 'panda.user.jschovan.lsst.%s' % commands.getoutput('uuidgen')
destName    = None

job = JobSpec()
job.jobDefinitionID   = int(time.time()) % 10000
job.jobName           = "%s" % commands.getoutput('uuidgen')
### job.transformation    = 'http://www.usatlas.bnl.gov/~wenaus/lsst-trf/lsst-trf.sh'
#job.transformation    = 'http://pandawms.org/pandawms-jobcache/lsst-trf.sh'
job.transformation = 'http://pandawms.org/pandawms-jobcache/lsst-trf-phosim332.sh'
job.destinationDBlock = datasetName
#job.destinationSE     = destName
job.destinationSE     = 'local' 
job.currentPriority   = 1000
#job.prodSourceLabel   = 'ptest'
#job.prodSourceLabel = 'panda'
#job.prodSourceLabel = 'ptest'
#job.prodSourceLabel = 'test'
#job.prodSourceLabel = 'ptest'
### 2014-01-27
Ejemplo n.º 9
0
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

if len(sys.argv)>1:
    site = sys.argv[1]
else:
    site = None

jobList = []
for i in range(2):
    datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
    destName    = 'ANALY_BNL_ATLAS_1'
    
    job = JobSpec()
    job.jobDefinitionID   = 1
    job.jobName           = commands.getoutput('uuidgen') 
    job.AtlasRelease      = 'Atlas-12.0.2'
    job.homepackage       = 'AnalysisTransforms'
    job.transformation    = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena2'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.currentPriority   = 3000
    job.prodSourceLabel   = 'user'
    job.computingSite     = site
    job.prodDBlock        = 'testIdeal_06.005001.pythia_minbias.recon.AOD.v12000103'
    
    fileOL = FileSpec()
    fileOL.lfn = "%s.job.log.tgz" % commands.getoutput('uuidgen') 
    fileOL.destinationDBlock = job.destinationDBlock
Ejemplo n.º 10
0
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName    = None

files = {
    'daq.ATLAS.0092045.physics.RPCwBeam.LB0016.SFO-2._0009.data':None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-14.4.0'
    job.homepackage       = 'AtlasTier0/14.4.0.2'
    job.transformation    = 'Reco_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.computingSite     = site
    job.prodDBlock        = 'data08_cos.00092045.physics_RPCwBeam.daq.RAW.o4_T1224560091' 
    
    job.prodSourceLabel   = 'test'
    job.processingType    = 'reprocessing'        
    job.currentPriority   = 10000
    job.cloud = cloud
    job.cmtConfig         = 'i686-slc4-gcc34-opt'
Ejemplo n.º 11
0
 def run(self):
     try:
         _logger.debug('%s Start %s' % (self.pandaID,self.job.jobStatus))
         flagComplete    = True
         ddmJobs         = []
         topUserDsList   = []
         usingMerger     = False        
         disableNotifier = False
         firstIndvDS     = True
         finalStatusDS   = []
         for destinationDBlock in self.destinationDBlocks:
             dsList = []
             _logger.debug('%s start %s' % (self.pandaID,destinationDBlock))
             # ignore tid datasets
             if re.search('_tid[\d_]+$',destinationDBlock):
                 _logger.debug('%s skip %s' % (self.pandaID,destinationDBlock))                
                 continue
             # ignore HC datasets
             if re.search('^hc_test\.',destinationDBlock) != None or re.search('^user\.gangarbt\.',destinationDBlock) != None:
                 if re.search('_sub\d+$',destinationDBlock) == None and re.search('\.lib$',destinationDBlock) == None:
                     _logger.debug('%s skip HC %s' % (self.pandaID,destinationDBlock))                
                     continue
             # query dataset
             if self.datasetMap.has_key(destinationDBlock):
                 dataset = self.datasetMap[destinationDBlock]
             else:
                 dataset = self.taskBuffer.queryDatasetWithMap({'name':destinationDBlock})
             if dataset == None:
                 _logger.error('%s Not found : %s' % (self.pandaID,destinationDBlock))
                 flagComplete = False
                 continue
             # skip tobedeleted/tobeclosed 
             if dataset.status in ['cleanup','tobeclosed','completed']:
                 _logger.debug('%s skip %s due to %s' % (self.pandaID,destinationDBlock,dataset.status))
                 continue
             dsList.append(dataset)
             # sort
             dsList.sort()
             # count number of completed files
             notFinish = self.taskBuffer.countFilesWithMap({'destinationDBlock':destinationDBlock,
                                                            'status':'unknown'})
             if notFinish < 0:
                 _logger.error('%s Invalid DB return : %s' % (self.pandaID,notFinish))
                 flagComplete = False                
                 continue
             # check if completed
             _logger.debug('%s notFinish:%s' % (self.pandaID,notFinish))
             if self.job.destinationSE == 'local' and self.job.prodSourceLabel in ['user','panda']:
                 # close non-DQ2 destinationDBlock immediately
                 finalStatus = 'closed'
             elif self.job.lockedby == 'jedi' and self.isTopLevelDS(destinationDBlock):
                 # set it closed in order not to trigger DDM cleanup. It will be closed by JEDI
                 finalStatus = 'closed'
             elif self.job.prodSourceLabel in ['user'] and "--mergeOutput" in self.job.jobParameters \
                      and self.job.processingType != 'usermerge':
                 # merge output files
                 if firstIndvDS:
                     # set 'tobemerged' to only the first dataset to avoid triggering many Mergers for --individualOutDS
                     finalStatus = 'tobemerged'
                     firstIndvDS = False
                 else:
                     finalStatus = 'tobeclosed'
                 # set merging to top dataset
                 usingMerger = True
                 # disable Notifier
                 disableNotifier = True
             elif self.job.produceUnMerge():
                 finalStatus = 'doing'
             else:
                 # set status to 'tobeclosed' to trigger DQ2 closing
                 finalStatus = 'tobeclosed'
             if notFinish==0: 
                 _logger.debug('%s set %s to dataset : %s' % (self.pandaID,finalStatus,destinationDBlock))
                 # set status
                 dataset.status = finalStatus
                 # update dataset in DB
                 retT = self.taskBuffer.updateDatasets(dsList,withLock=True,withCriteria="status<>:crStatus AND status<>:lockStatus ",
                                                       criteriaMap={':crStatus':finalStatus,':lockStatus':'locked'})
                 if len(retT) > 0 and retT[0]==1:
                     finalStatusDS += dsList
                     # close user datasets
                     if self.job.prodSourceLabel in ['user'] and self.job.destinationDBlock.endswith('/') \
                            and (dataset.name.startswith('user') or dataset.name.startswith('group')):
                         # get top-level user dataset 
                         topUserDsName = re.sub('_sub\d+$','',dataset.name)
                         # update if it is the first attempt
                         if topUserDsName != dataset.name and not topUserDsName in topUserDsList and self.job.lockedby != 'jedi':
                             topUserDs = self.taskBuffer.queryDatasetWithMap({'name':topUserDsName})
                             if topUserDs != None:
                                 # check status
                                 if topUserDs.status in ['completed','cleanup','tobeclosed',
                                                         'tobemerged','merging']:
                                     _logger.debug('%s skip %s due to status=%s' % (self.pandaID,topUserDsName,topUserDs.status))
                                 else:
                                     # set status
                                     if self.job.processingType.startswith('gangarobot') or \
                                            self.job.processingType.startswith('hammercloud'):
                                         # not trigger freezing for HC datasets so that files can be appended
                                         topUserDs.status = 'completed'
                                     elif not usingMerger:
                                         topUserDs.status = finalStatus
                                     else:
                                         topUserDs.status = 'merging'
                                     # append to avoid repetition
                                     topUserDsList.append(topUserDsName)
                                     # update DB
                                     retTopT = self.taskBuffer.updateDatasets([topUserDs],withLock=True,withCriteria="status<>:crStatus",
                                                                              criteriaMap={':crStatus':topUserDs.status})
                                     if len(retTopT) > 0 and retTopT[0]==1:
                                         _logger.debug('%s set %s to top dataset : %s' % (self.pandaID,topUserDs.status,topUserDsName))
                                     else:
                                         _logger.debug('%s failed to update top dataset : %s' % (self.pandaID,topUserDsName))
                         # get parent dataset for merge job
                         if self.job.processingType == 'usermerge':
                             tmpMatch = re.search('--parentDS ([^ \'\"]+)',self.job.jobParameters)
                             if tmpMatch == None:
                                 _logger.error('%s failed to extract parentDS' % self.pandaID)
                             else:
                                 unmergedDsName = tmpMatch.group(1)
                                 # update if it is the first attempt
                                 if not unmergedDsName in topUserDsList:
                                     unmergedDs = self.taskBuffer.queryDatasetWithMap({'name':unmergedDsName})
                                     if unmergedDs == None:
                                         _logger.error('%s failed to get parentDS=%s from DB' % (self.pandaID,unmergedDsName))
                                     else:
                                         # check status
                                         if unmergedDs.status in ['completed','cleanup','tobeclosed']:
                                             _logger.debug('%s skip %s due to status=%s' % (self.pandaID,unmergedDsName,unmergedDs.status))
                                         else:
                                             # set status
                                             unmergedDs.status = finalStatus
                                             # append to avoid repetition
                                             topUserDsList.append(unmergedDsName)
                                             # update DB
                                             retTopT = self.taskBuffer.updateDatasets([unmergedDs],withLock=True,withCriteria="status<>:crStatus",
                                                                                      criteriaMap={':crStatus':unmergedDs.status})
                                             if len(retTopT) > 0 and retTopT[0]==1:
                                                 _logger.debug('%s set %s to parent dataset : %s' % (self.pandaID,unmergedDs.status,unmergedDsName))
                                             else:
                                                 _logger.debug('%s failed to update parent dataset : %s' % (self.pandaID,unmergedDsName))
                     if self.pandaDDM and self.job.prodSourceLabel=='managed':
                         # instantiate SiteMapper
                         if self.siteMapper == None:
                             self.siteMapper = SiteMapper(self.taskBuffer)
                         # get file list for PandaDDM
                         retList = self.taskBuffer.queryFilesWithMap({'destinationDBlock':destinationDBlock})
                         lfnsStr = ''
                         guidStr = ''
                         for tmpFile in retList:
                             if tmpFile.type in ['log','output']:
                                 lfnsStr += '%s,' % tmpFile.lfn
                                 guidStr += '%s,' % tmpFile.GUID
                         if lfnsStr != '':
                             guidStr = guidStr[:-1]
                             lfnsStr = lfnsStr[:-1]
                             # create a DDM job
                             ddmjob = JobSpec()
                             ddmjob.jobDefinitionID   = int(time.time()) % 10000
                             ddmjob.jobName           = "%s" % commands.getoutput('uuidgen')
                             ddmjob.transformation    = 'http://pandaserver.cern.ch:25080/trf/mover/run_dq2_cr'
                             ddmjob.destinationDBlock = 'testpanda.%s' % ddmjob.jobName
                             ddmjob.computingSite     = "BNL_ATLAS_DDM"
                             ddmjob.destinationSE     = ddmjob.computingSite
                             ddmjob.currentPriority   = 200000
                             ddmjob.prodSourceLabel   = 'ddm'
                             ddmjob.transferType      = 'sub'
                             # append log file
                             fileOL = FileSpec()
                             fileOL.lfn = "%s.job.log.tgz" % ddmjob.jobName
                             fileOL.destinationDBlock = ddmjob.destinationDBlock
                             fileOL.destinationSE     = ddmjob.destinationSE
                             fileOL.dataset           = ddmjob.destinationDBlock
                             fileOL.type = 'log'
                             ddmjob.addFile(fileOL)
                             # make arguments
                             dstDQ2ID = 'BNLPANDA'
                             srcDQ2ID = self.siteMapper.getSite(self.job.computingSite).ddm
                             callBackURL = 'https://%s:%s/server/panda/datasetCompleted?vuid=%s&site=%s' % \
                                           (panda_config.pserverhost,panda_config.pserverport,
                                            dataset.vuid,dstDQ2ID)
                             _logger.debug(callBackURL)
                             # set src/dest
                             ddmjob.sourceSite      = srcDQ2ID
                             ddmjob.destinationSite = dstDQ2ID
                             # if src==dst, send callback without ddm job
                             if dstDQ2ID == srcDQ2ID:
                                 comout = commands.getoutput('curl -k %s' % callBackURL)
                                 _logger.debug(comout)
                             else:
                                 # run dq2_cr
                                 callBackURL = urllib.quote(callBackURL)
                                 # get destination dir
                                 destDir = brokerage.broker_util._getDefaultStorage(self.siteMapper.getSite(self.job.computingSite).dq2url)
                                 argStr = "-s %s -r %s --guids %s --lfns %s --callBack %s -d %s/%s %s" % \
                                          (srcDQ2ID,dstDQ2ID,guidStr,lfnsStr,callBackURL,destDir,
                                           destinationDBlock,destinationDBlock)
                                 # set job parameters
                                 ddmjob.jobParameters = argStr
                                 _logger.debug('%s pdq2_cr %s' % (self.pandaID,ddmjob.jobParameters))
                                 ddmJobs.append(ddmjob)
                     # start Activator
                     if re.search('_sub\d+$',dataset.name) == None:
                         if self.job.prodSourceLabel=='panda' and self.job.processingType in ['merge','unmerge']:
                             # don't trigger Activator for merge jobs
                             pass
                         else:
                             if self.job.jobStatus == 'finished':
                                 aThr = Activator(self.taskBuffer,dataset)
                                 aThr.start()
                                 aThr.join()
                 else:
                     # unset flag since another thread already updated 
                     #flagComplete = False
                     pass
             else:
                 # update dataset in DB
                 self.taskBuffer.updateDatasets(dsList,withLock=True,withCriteria="status<>:crStatus AND status<>:lockStatus ",
                                                criteriaMap={':crStatus':finalStatus,':lockStatus':'locked'})
                 # unset flag
                 flagComplete = False
             # end
             _logger.debug('%s end %s' % (self.pandaID,destinationDBlock))
         # start DDM jobs
         if ddmJobs != []:
             self.taskBuffer.storeJobs(ddmJobs,self.job.prodUserID,joinThr=True)
         # change pending jobs to failed
         finalizedFlag = True
         if flagComplete and self.job.prodSourceLabel=='user':
             _logger.debug('%s finalize %s %s' % (self.pandaID,self.job.prodUserName,self.job.jobDefinitionID))
             finalizedFlag = self.taskBuffer.finalizePendingJobs(self.job.prodUserName,self.job.jobDefinitionID,waitLock=True)
             _logger.debug('%s finalized with %s' % (self.pandaID,finalizedFlag))
         # update unmerged datasets in JEDI to trigger merging
         if flagComplete and self.job.produceUnMerge() and finalStatusDS != []:
             if finalizedFlag:
                 self.taskBuffer.updateUnmergedDatasets(self.job,finalStatusDS)
         # start notifier
         _logger.debug('%s source:%s complete:%s' % (self.pandaID,self.job.prodSourceLabel,flagComplete))
         if (self.job.jobStatus != 'transferring') and ((flagComplete and self.job.prodSourceLabel=='user') or \
            (self.job.jobStatus=='failed' and self.job.prodSourceLabel=='panda')) and \
            self.job.lockedby != 'jedi':
             # don't send email for merge jobs
             if (not disableNotifier) and not self.job.processingType in ['merge','unmerge']:
                 useNotifier = True
                 summaryInfo = {}
                 # check all jobDefIDs in jobsetID
                 if not self.job.jobsetID in [0,None,'NULL']:
                     useNotifier,summaryInfo = self.taskBuffer.checkDatasetStatusForNotifier(self.job.jobsetID,self.job.jobDefinitionID,
                                                                                             self.job.prodUserName)
                     _logger.debug('%s useNotifier:%s' % (self.pandaID,useNotifier))
                 if useNotifier:
                     _logger.debug('%s start Notifier' % self.pandaID)
                     nThr = Notifier.Notifier(self.taskBuffer,self.job,self.destinationDBlocks,summaryInfo)
                     nThr.run()
                     _logger.debug('%s end Notifier' % self.pandaID)                    
         _logger.debug('%s End' % self.pandaID)
     except:
         errType,errValue = sys.exc_info()[:2]
         _logger.error("%s %s" % (errType,errValue))
Ejemplo n.º 12
0
import random
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

if len(sys.argv)>1:
    site = sys.argv[1]
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

index = 0

job = JobSpec()
job.jobDefinitionID   = int(time.time()) % 10000
job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
job.AtlasRelease      = 'Atlas-14.1.0\nAtlas-14.1.0'
job.homepackage       = 'AtlasProduction/14.1.0.3\nAtlasProduction/14.1.0.3'
job.transformation    = 'csc_digi_trf.py\ncsc_reco_trf.py'
job.destinationDBlock = datasetName

job.computingSite     = site

job.prodDBlock        = 'valid1.005200.T1_McAtNlo_Jimmy.simul.HITS.e322_s429_tid022081'
    
job.prodSourceLabel   = 'test'    
job.currentPriority   = 10000
job.cloud             = 'US'
Ejemplo n.º 13
0
    cloud      = sys.argv[2]
    prodDBlock = sys.argv[3]
    inputFile  = sys.argv[4]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile:None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = (time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-15.3.1'
    job.homepackage       = 'AtlasProduction/15.3.1.5'
    job.transformation    = 'csc_atlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite     = site
    job.prodDBlock        = prodDBlock
    
    job.prodSourceLabel   = 'test'
    job.processingType    = 'test'
    job.currentPriority   = 10000
    job.cloud             = cloud
    job.cmtConfig         = 'i686-slc4-gcc34-opt'
Ejemplo n.º 14
0
 def run(self):
     try:
         self.putLog('start %s' % self.evpFileName)            
         # lock evp file
         self.evpFile = open(self.evpFileName)
         try:
             fcntl.flock(self.evpFile.fileno(),fcntl.LOCK_EX|fcntl.LOCK_NB)
         except:
             # relase
             self.putLog("cannot lock %s" % self.evpFileName)
             self.evpFile.close()
             return True
         # options
         runEvtList          = []
         eventPickDataType   = ''
         eventPickStreamName = ''
         eventPickDS         = []
         eventPickAmiTag     = ''
         eventPickNumSites   = 1
         inputFileList       = []
         tagDsList           = []
         tagQuery            = ''
         tagStreamRef        = ''
         skipDaTRI           = False
         runEvtGuidMap       = {}
         ei_api              = ''
         # read evp file
         for tmpLine in self.evpFile:
             tmpMatch = re.search('^([^=]+)=(.+)$',tmpLine)
             # check format
             if tmpMatch == None:
                 continue
             tmpItems = tmpMatch.groups()
             if tmpItems[0] == 'runEvent':
                 # get run and event number
                 tmpRunEvt = tmpItems[1].split(',')
                 if len(tmpRunEvt) == 2:
                     runEvtList.append(tmpRunEvt)
             elif tmpItems[0] == 'eventPickDataType':
                 # data type
                 eventPickDataType = tmpItems[1]
             elif tmpItems[0] == 'eventPickStreamName':
                 # stream name
                 eventPickStreamName = tmpItems[1]
             elif tmpItems[0] == 'eventPickDS':
                 # dataset pattern
                 eventPickDS = tmpItems[1].split(',')
             elif tmpItems[0] == 'eventPickAmiTag':
                 # AMI tag
                 eventPickAmiTag = tmpItems[1]
             elif tmpItems[0] == 'eventPickNumSites':
                 # the number of sites where datasets are distributed
                 try:
                     eventPickNumSites = int(tmpItems[1])
                 except:
                     pass
             elif tmpItems[0] == 'userName':
                 # user name
                 self.userDN = tmpItems[1]
                 self.putLog("user=%s" % self.userDN)
             elif tmpItems[0] == 'userTaskName':
                 # user task name
                 self.userTaskName = tmpItems[1]
             elif tmpItems[0] == 'userDatasetName':
                 # user dataset name
                 self.userDatasetName = tmpItems[1]
             elif tmpItems[0] == 'lockedBy':
                 # client name
                 self.lockedBy = tmpItems[1]
             elif tmpItems[0] == 'creationTime':
                 # creation time
                 self.creationTime = tmpItems[1]
             elif tmpItems[0] == 'params':
                 # parameters
                 self.params = tmpItems[1]
             elif tmpItems[0] == 'ei_api':
                 # ei api parameter for MC
                 ei_api = tmpItems[1]
             elif tmpItems[0] == 'inputFileList':
                 # input file list
                 inputFileList = tmpItems[1].split(',')
                 try:
                     inputFileList.remove('')
                 except:
                     pass
             elif tmpItems[0] == 'tagDS':
                 # TAG dataset
                 tagDsList = tmpItems[1].split(',')
             elif tmpItems[0] == 'tagQuery':
                 # query for TAG
                 tagQuery = tmpItems[1]
             elif tmpItems[0] == 'tagStreamRef':
                 # StreamRef for TAG
                 tagStreamRef = tmpItems[1]
                 if not tagStreamRef.endswith('_ref'):
                     tagStreamRef += '_ref'
             elif tmpItems[0] == 'runEvtGuidMap':
                 # GUIDs
                 try:
                     exec "runEvtGuidMap="+tmpItems[1]
                 except:
                     pass
         # extract task name
         if self.userTaskName == '' and self.params != '':
             try:
                 tmpMatch = re.search('--outDS(=| ) *([^ ]+)',self.params)
                 if tmpMatch != None:
                     self.userTaskName = tmpMatch.group(2)
                     if not self.userTaskName.endswith('/'):
                         self.userTaskName += '/'
             except:
                 pass
         # suppress DaTRI
         if self.params != '':
             if '--eventPickSkipDaTRI' in self.params:
                 skipDaTRI = True
         # get compact user name
         compactDN = self.taskBuffer.cleanUserID(self.userDN)
         # get jediTaskID
         self.jediTaskID = self.taskBuffer.getTaskIDwithTaskNameJEDI(compactDN,self.userTaskName)
         # convert 
         if tagDsList == [] or tagQuery == '':
             # convert run/event list to dataset/file list
             tmpRet,locationMap,allFiles = self.pd2p.convertEvtRunToDatasets(runEvtList,
                                                                             eventPickDataType,
                                                                             eventPickStreamName,
                                                                             eventPickDS,
                                                                             eventPickAmiTag,
                                                                             self.userDN,
                                                                             runEvtGuidMap,
                                                                             ei_api
                                                                             )
             if not tmpRet:
                 if 'isFatal' in locationMap and locationMap['isFatal'] == True:
                     self.ignoreError = False
                 self.endWithError('Failed to convert the run/event list to a dataset/file list')
                 return False
         else:
             # get parent dataset/files with TAG
             tmpRet,locationMap,allFiles = self.pd2p.getTagParentInfoUsingTagQuery(tagDsList,tagQuery,tagStreamRef) 
             if not tmpRet:
                 self.endWithError('Failed to get parent dataset/file list with TAG')
                 return False
         # use only files in the list
         if inputFileList != []:
             tmpAllFiles = []
             for tmpFile in allFiles:
                 if tmpFile['lfn'] in inputFileList:
                     tmpAllFiles.append(tmpFile)
             allFiles = tmpAllFiles        
         # remove redundant CN from DN
         tmpDN = self.userDN
         tmpDN = re.sub('/CN=limited proxy','',tmpDN)
         tmpDN = re.sub('(/CN=proxy)+$','',tmpDN)
         # make dataset container
         tmpRet = self.pd2p.registerDatasetContainerWithDatasets(self.userDatasetName,allFiles,
                                                                 locationMap,
                                                                 nSites=eventPickNumSites,
                                                                 owner=tmpDN)
         if not tmpRet:
             self.endWithError('Failed to make a dataset container %s' % self.userDatasetName)
             return False
         # skip DaTRI
         if skipDaTRI:
             # successfully terminated
             self.putLog("skip DaTRI")
             # update task
             self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID)
         else:
             # get candidates
             tmpRet,candidateMaps = self.pd2p.getCandidates(self.userDatasetName,checkUsedFile=False,
                                                            useHidden=True)
             if not tmpRet:
                 self.endWithError('Failed to find candidate for destination')
                 return False
             # collect all candidates
             allCandidates = [] 
             for tmpDS,tmpDsVal in candidateMaps.iteritems():
                 for tmpCloud,tmpCloudVal in tmpDsVal.iteritems():
                     for tmpSiteName in tmpCloudVal[0]:
                         if not tmpSiteName in allCandidates:
                             allCandidates.append(tmpSiteName)
             if allCandidates == []:
                 self.endWithError('No candidate for destination')
                 return False
             # get list of dataset (container) names
             if eventPickNumSites > 1:
                 # decompose container to transfer datasets separately
                 tmpRet,tmpOut = self.pd2p.getListDatasetReplicasInContainer(self.userDatasetName) 
                 if not tmpRet:
                     self.endWithError('Failed to get the size of %s' % self.userDatasetName)
                     return False
                 userDatasetNameList = tmpOut.keys()
             else:
                 # transfer container at once
                 userDatasetNameList = [self.userDatasetName]
             # loop over all datasets
             sitesUsed = []
             for tmpUserDatasetName in userDatasetNameList:
                 # get size of dataset container
                 tmpRet,totalInputSize = rucioAPI.getDatasetSize(tmpUserDatasetName)
                 if not tmpRet:
                     self.endWithError('Failed to get the size of %s' % tmpUserDatasetName)
                     return False
                 # run brokerage
                 tmpJob = JobSpec()
                 tmpJob.AtlasRelease = ''
                 self.putLog("run brokerage for %s" % tmpDS)
                 brokerage.broker.schedule([tmpJob],self.taskBuffer,self.siteMapper,True,allCandidates,
                                           True,datasetSize=totalInputSize)
                 if tmpJob.computingSite.startswith('ERROR'):
                     self.endWithError('brokerage failed with %s' % tmpJob.computingSite)
                     return False
                 self.putLog("site -> %s" % tmpJob.computingSite)
                 # send transfer request
                 try:
                     tmpDN = rucioAPI.parse_dn(tmpDN)
                     tmpStatus,userInfo = rucioAPI.finger(tmpDN)
                     if not tmpStatus:
                         raise RuntimeError,'user info not found for {0} with {1}'.format(tmpDN,userInfo)
                     tmpDN = userInfo['nickname']
                     tmpDQ2ID = self.siteMapper.getSite(tmpJob.computingSite).ddm_input
                     tmpMsg = "%s ds=%s site=%s id=%s" % ('registerDatasetLocation for DaTRI ',
                                                          tmpUserDatasetName,
                                                          tmpDQ2ID,
                                                          tmpDN)
                     self.putLog(tmpMsg)
                     rucioAPI.registerDatasetLocation(tmpDS,[tmpDQ2ID],lifetime=14,owner=tmpDN,
                                                      activity="User Subscriptions")
                     self.putLog('OK')
                 except:
                     errType,errValue = sys.exc_info()[:2]
                     tmpStr = 'Failed to send transfer request : %s %s' % (errType,errValue)
                     tmpStr.strip()
                     tmpStr += traceback.format_exc()
                     self.endWithError(tmpStr)
                     return False
                 # list of sites already used
                 sitesUsed.append(tmpJob.computingSite)
                 self.putLog("used %s sites" % len(sitesUsed))
                 # set candidates
                 if len(sitesUsed) >= eventPickNumSites:
                     # reset candidates to limit the number of sites
                     allCandidates = sitesUsed
                     sitesUsed = []
                 else:
                     # remove site
                     allCandidates.remove(tmpJob.computingSite)
             # send email notification for success
             tmpMsg =  'A transfer request was successfully sent to Rucio.\n'
             tmpMsg += 'Your task will get started once transfer is completed.'
             self.sendEmail(True,tmpMsg)
         try:
             # unlock and delete evp file
             fcntl.flock(self.evpFile.fileno(),fcntl.LOCK_UN)
             self.evpFile.close()
             os.remove(self.evpFileName)
         except:
             pass
         # successfully terminated
         self.putLog("end %s" % self.evpFileName)
         return True
     except:
         errType,errValue = sys.exc_info()[:2]
         self.endWithError('Got exception %s:%s %s' % (errType,errValue,traceback.format_exc()))
         return False
Ejemplo n.º 15
0
def main():
    logger.info('Getting tasks with status send and running')
    #    tasks_list = Task.objects.all().filter(Q(status='send') | Q(status='running'))
    tasks_list = Task.objects.all().filter(name='dvcs2016P09t2r13v1_mu+')
    logger.info('Got list of %s tasks' % len(tasks_list))

    for t in tasks_list:
        logger.info('Getting jobs in status defined or failed for task %s' % t)
        jobs_list_count = Job.objects.all().filter(task=t).count()
        if jobs_list_count > 50:
            jobs_list = Job.objects.all().filter(
                task=t).order_by('id')[:max_send_amount]
        else:
            jobs_list = Job.objects.all().filter(
                task=t).order_by('id')[:jobs_list_count]
        logger.info('Got list of %s jobs' % len(jobs_list))

        i = 0
        for j in jobs_list:
            if i >= max_send_amount:
                break

            logger.info('Going to send job %s of %s task' %
                        (j.file, j.task.name))

            umark = commands.getoutput('uuidgen')
            datasetName = 'panda.destDB.%s' % umark
            destName = 'COMPASSPRODDISK'  # PanDA will not try to move output data, data will be placed by pilot (based on schedconfig)
            TMPRAWFILE = j.file[j.file.rfind('/') + 1:]
            logger.info(TMPRAWFILE)
            TMPMDSTFILE = 'mDST-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.root' % {
                'input_file': j.file,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            logger.info(TMPMDSTFILE)
            TMPHISTFILE = '%(runNumber)s-%(runChunk)s-%(prodSlt)s.root' % {
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(TMPHISTFILE)
            TMPRICHFILE = 'gfile_%(runNumber)s-%(runChunk)s.gfile' % {
                'runNumber': j.run_number,
                'runChunk': j.chunk_number
            }
            logger.info(TMPRICHFILE)
            EVTDUMPFILE = 'evtdump%(prodSlt)s-%(runChunk)s-%(runNumber)s.raw' % {
                'prodSlt': j.task.prodslt,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number
            }
            logger.info(EVTDUMPFILE)
            STDOUTFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stdout' % {
                'prodNameOnly': j.task.soft,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(STDOUTFILE)
            STDERRFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stderr' % {
                'prodNameOnly': j.task.soft,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(STDERRFILE)
            try:
                file_year = j.file.split('/')[5]
                logger.info(file_year)
            except:
                logger.error('Error while splitting file to get year')
                sys.exit(1)

            ProdPathAndName = j.task.home + j.task.path + j.task.soft

            job = JobSpec()
            job.taskID = j.task.id
            job.jobDefinitionID = 0
            job.jobName = '%(prodName)s-%(fileYear)s--%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s' % {
                'prodName': j.task.soft,
                'fileYear': file_year,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            job.transformation = j.task.type  # payload (can be URL as well)
            job.destinationDBlock = datasetName
            job.destinationSE = destName
            job.currentPriority = 2000
            job.prodSourceLabel = 'prod_test'
            job.computingSite = site
            job.attemptNr = j.attempt + 1
            job.maxAttempt = j.task.max_attempts
            if j.status == 'failed':
                job.parentID = j.panda_id
            head, tail = os.path.split(j.file)
            #            job.transferType = 'direct'
            job.sourceSite = 'CERN_COMPASS_PROD'

            # logs, and all files generated during execution will be placed in log (except output file)
            #job.jobParameters='source /afs/cern.ch/project/eos/installation/compass/etc/setup.sh;export EOS_MGM_URL=root://eoscompass.cern.ch;export PATH=/afs/cern.ch/project/eos/installation/compass/bin:$PATH;ppwd=$(pwd);echo $ppwd;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;coralpath=%(ProdPathAndName)s/coral;echo $coralpath;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";echo $coralpathsetup;source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/template.opt;xrdcp -np $ppwd/%(TMPMDSTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/mDST/%(TMPMDSTFILE)s;xrdcp -np $ppwd/%(TMPHISTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/histos/%(TMPHISTFILE)s;metadataxml=$(ls metadata-*);echo $metadataxml;cp $metadataxml $metadataxml.PAYLOAD;' % {'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'input_file': input_file, 'ProdPathAndName': ProdPathAndName, 'prodName': prodName}
            job.jobParameters = 'export EOS_MGM_URL=root://eoscompass.cern.ch;ppwd=$(pwd);export COMPASS_SW_PREFIX=/eos/experiment/compass/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;xrdcp -N -f root://castorpublic.cern.ch/%(input_file)s\?svcClass=compasscdr .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/%(template)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;rm %(tail)s' % {
                'TMPRAWFILE': TMPRAWFILE,
                'TMPMDSTFILE': TMPMDSTFILE,
                'TMPHISTFILE': TMPHISTFILE,
                'TMPRICHFILE': TMPRICHFILE,
                'input_file': j.file,
                'ProdPathAndName': ProdPathAndName,
                'prodPath': j.task.path,
                'prodName': j.task.soft,
                'template': j.task.template,
                'tail': tail,
                'prodSlt': j.task.prodslt,
                'EVTDUMPFILE': EVTDUMPFILE,
                'STDOUTFILE': STDOUTFILE,
                'STDERRFILE': STDERRFILE
            }

            fileIRaw = FileSpec()
            fileIRaw.lfn = "%s" % (j.file)
            fileIRaw.GUID = '5874a461-61d3-4543-8f34-6fd7a4624e78'
            fileIRaw.fsize = 1073753368
            fileIRaw.checksum = '671608be'
            fileIRaw.destinationDBlock = job.destinationDBlock
            fileIRaw.destinationSE = job.destinationSE
            fileIRaw.dataset = job.destinationDBlock
            fileIRaw.type = 'input'
            job.addFile(fileIRaw)

            fileOstdout = FileSpec()
            fileOstdout.lfn = "payload_stdout.txt"
            fileOstdout.destinationDBlock = job.destinationDBlock
            fileOstdout.destinationSE = job.destinationSE
            fileOstdout.dataset = job.destinationDBlock
            fileOstdout.type = 'output'
            job.addFile(fileOstdout)

            fileOstderr = FileSpec()
            fileOstderr.lfn = "payload_stderr.txt"
            fileOstderr.destinationDBlock = job.destinationDBlock
            fileOstderr.destinationSE = job.destinationSE
            fileOstderr.dataset = job.destinationDBlock
            fileOstderr.type = 'output'
            job.addFile(fileOstderr)

            fileOLog = FileSpec()
            fileOLog.lfn = "%(prodName)s-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.job.log.tgz" % {
                'prodName': j.task.soft,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            fileOLog.destinationDBlock = job.destinationDBlock
            fileOLog.destinationSE = job.destinationSE
            fileOLog.dataset = job.destinationDBlock
            fileOLog.type = 'log'
            job.addFile(fileOLog)

            fileOmDST = FileSpec()
            fileOmDST.lfn = "%s" % (TMPMDSTFILE)
            fileOmDST.destinationDBlock = job.destinationDBlock
            fileOmDST.destinationSE = job.destinationSE
            fileOmDST.dataset = job.destinationDBlock
            fileOmDST.type = 'output'
            job.addFile(fileOmDST)

            fileOTrafdic = FileSpec()
            fileOTrafdic.lfn = "%s" % (TMPHISTFILE)
            fileOTrafdic.destinationDBlock = job.destinationDBlock
            fileOTrafdic.destinationSE = job.destinationSE
            fileOTrafdic.dataset = job.destinationDBlock
            fileOTrafdic.type = 'output'
            job.addFile(fileOTrafdic)

            fileOtestevtdump = FileSpec()
            fileOtestevtdump.lfn = "testevtdump.raw"
            fileOtestevtdump.destinationDBlock = job.destinationDBlock
            fileOtestevtdump.destinationSE = job.destinationSE
            fileOtestevtdump.dataset = job.destinationDBlock
            fileOtestevtdump.type = 'output'
            job.addFile(fileOtestevtdump)

            s, o = Client.submitJobs([job], srvID=aSrvID)
            logger.info(s)
            logger.info(o)
            #             for x in o:
            #                 logger.info("PandaID=%s" % x[0])
            #                 today = datetime.datetime.today()
            #
            #                 if x[0] != 0 and x[0] != 'NULL':
            #                     j_update = Job.objects.get(id=j.id)
            #                     j_update.panda_id = x[0]
            #                     j_update.status = 'sent'
            #                     j_update.attempt = j_update.attempt + 1
            #                     j_update.date_updated = today
            #
            #                     try:
            #                         j_update.save()
            #                         logger.info('Job %s with PandaID %s updated at %s' % (j.id, x[0], today))
            #                     except IntegrityError as e:
            #                         logger.exception('Unique together catched, was not saved')
            #                     except DatabaseError as e:
            #                         logger.exception('Something went wrong while saving: %s' % e.message)
            #                 else:
            #                     logger.info('Job %s was not added to PanDA' % j.id)
            i += 1

    logger.info('done')
Ejemplo n.º 16
0
def main():
    logger.info('Getting tasks with status send and running')
    tasks_list = Task.objects.all().filter(
        Q(status='send') | Q(status='running'))
    #tasks_list = Task.objects.all().filter(name='dvcs2017align7_mu-')
    logger.info('Got list of %s tasks' % len(tasks_list))

    cdbServerArr = ['compassvm23.cern.ch', 'compassvm24.cern.ch']
    cdbServer = cdbServerArr[0]

    for t in tasks_list:
        max_send_amount = 1000

        logger.info('Getting jobs in status staged or failed for task %s' % t)
        jobs_list_count = Job.objects.all().filter(task=t).filter(
            attempt__lt=t.max_attempts).filter(
                Q(status='staged') | Q(status='failed')).count()
        if jobs_list_count > 50:
            jobs_list = Job.objects.all().filter(task=t).filter(
                attempt__lt=t.max_attempts).filter(
                    Q(status='staged') | Q(status='failed')).order_by(
                        '-number_of_events')[:max_send_amount]
        else:
            jobs_list = Job.objects.all().filter(task=t).filter(
                attempt__lt=t.max_attempts).filter(
                    Q(status='staged') | Q(status='failed')).order_by(
                        '-number_of_events')[:jobs_list_count]
        logger.info('Got list of %s jobs' % len(jobs_list))

        #    jobs_list = Job.objects.all().filter(task=t).filter(file='/castor/cern.ch/compass/data/2017/raw/W04/cdr12116-278485.raw')

        i = 0
        for j in jobs_list:
            if j.attempt >= j.task.max_attempts:
                logger.info(
                    'Number of retry attempts has reached for job %s of task %s'
                    % (j.file, j.task.name))
                continue

            if i > max_send_amount:
                break

            logger.info('Job %s of %s' % (i, max_send_amount))
            logger.info('Going to send job %s of %s task' %
                        (j.file, j.task.name))

            umark = commands.getoutput('uuidgen')
            datasetName = 'panda.destDB.%s' % umark
            destName = 'local'  # PanDA will not try to move output data, data will be placed by pilot (based on schedconfig)
            TMPRAWFILE = j.file[j.file.rfind('/') + 1:]
            logger.info(TMPRAWFILE)
            TMPMDSTFILE = 'mDST-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.root' % {
                'input_file': j.file,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            logger.info(TMPMDSTFILE)
            TMPHISTFILE = '%(runNumber)s-%(runChunk)s-%(prodSlt)s.root' % {
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(TMPHISTFILE)
            TMPRICHFILE = 'gfile_%(runNumber)s-%(runChunk)s.gfile' % {
                'runNumber': j.run_number,
                'runChunk': j.chunk_number
            }
            logger.info(TMPRICHFILE)
            EVTDUMPFILE = 'evtdump%(prodSlt)s-%(runChunk)s-%(runNumber)s.raw' % {
                'prodSlt': j.task.prodslt,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number
            }
            logger.info(EVTDUMPFILE)
            STDOUTFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stdout' % {
                'prodNameOnly': j.task.production,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(STDOUTFILE)
            STDERRFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stderr' % {
                'prodNameOnly': j.task.production,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(STDERRFILE)
            PRODSOFT = j.task.soft
            logger.info(PRODSOFT)

            ProdPathAndName = j.task.home + j.task.path + j.task.soft

            job = JobSpec()
            job.VO = 'vo.compass.cern.ch'
            job.taskID = j.task.id
            job.jobDefinitionID = 0
            job.jobName = '%(prodName)s-%(fileYear)s--%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s' % {
                'prodName': j.task.production,
                'fileYear': j.task.year,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            job.transformation = j.task.type  # payload (can be URL as well)
            job.destinationDBlock = datasetName
            job.destinationSE = destName
            job.currentPriority = 2000
            if j.task.type == 'DDD filtering':
                job.currentPriority = 1000
            job.prodSourceLabel = 'prod_test'
            job.computingSite = j.task.site
            job.attemptNr = j.attempt + 1
            job.maxAttempt = j.task.max_attempts
            if j.status == 'failed':
                job.parentID = j.panda_id
            head, tail = os.path.split(j.file)

            cdbServer = cdbServerArr[random.randrange(len(cdbServerArr))]

            # logs, and all files generated during execution will be placed in log (except output file)
            #job.jobParameters='source /afs/cern.ch/project/eos/installation/compass/etc/setup.sh;export EOS_MGM_URL=root://eoscompass.cern.ch;export PATH=/afs/cern.ch/project/eos/installation/compass/bin:$PATH;ppwd=$(pwd);echo $ppwd;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;coralpath=%(ProdPathAndName)s/coral;echo $coralpath;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";echo $coralpathsetup;source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/template.opt;xrdcp -np $ppwd/%(TMPMDSTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/mDST/%(TMPMDSTFILE)s;xrdcp -np $ppwd/%(TMPHISTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/histos/%(TMPHISTFILE)s;metadataxml=$(ls metadata-*);echo $metadataxml;cp $metadataxml $metadataxml.PAYLOAD;' % {'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'input_file': input_file, 'ProdPathAndName': ProdPathAndName, 'prodName': prodName}
            if j.task.type == 'test production' or j.task.type == 'mass production' or j.task.type == 'technical production':
                if j.task.site == 'BW_COMPASS_MCORE':
                    job.jobParameters = 'ppwd=$(pwd);export COMPASS_SW_PREFIX=/scratch/sciteam/criedl/projectdata/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;export PRODSOFT=%(PRODSOFT)s;cp %(input_file)s .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/%(template)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;gzip payload_stderr.out;gzip payload_stdout.out;rm %(tail)s' % {
                        'TMPRAWFILE': TMPRAWFILE,
                        'TMPMDSTFILE': TMPMDSTFILE,
                        'TMPHISTFILE': TMPHISTFILE,
                        'TMPRICHFILE': TMPRICHFILE,
                        'PRODSOFT': PRODSOFT,
                        'input_file': j.file,
                        'ProdPathAndName': ProdPathAndName,
                        'prodPath': j.task.path,
                        'prodName': j.task.production,
                        'template': j.task.template,
                        'tail': tail,
                        'prodSlt': j.task.prodslt,
                        'EVTDUMPFILE': EVTDUMPFILE,
                        'STDOUTFILE': STDOUTFILE,
                        'STDERRFILE': STDERRFILE
                    }
                else:
                    job.jobParameters = 'export EOS_MGM_URL=root://eoscompass.cern.ch;ppwd=$(pwd);export COMPASS_SW_PREFIX=/eos/experiment/compass/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;export PRODSOFT=%(PRODSOFT)s;xrdcp -N -f root://castorpublic.cern.ch/%(input_file)s\?svcClass=compasscdr .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;export CDBSERVER=%(cdbServer)s;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/%(template)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;gzip payload_stderr.out;gzip payload_stdout.out;rm %(tail)s' % {
                        'TMPRAWFILE': TMPRAWFILE,
                        'TMPMDSTFILE': TMPMDSTFILE,
                        'TMPHISTFILE': TMPHISTFILE,
                        'TMPRICHFILE': TMPRICHFILE,
                        'PRODSOFT': PRODSOFT,
                        'input_file': j.file,
                        'ProdPathAndName': ProdPathAndName,
                        'prodPath': j.task.path,
                        'prodName': j.task.production,
                        'template': j.task.template,
                        'tail': tail,
                        'prodSlt': j.task.prodslt,
                        'EVTDUMPFILE': EVTDUMPFILE,
                        'STDOUTFILE': STDOUTFILE,
                        'STDERRFILE': STDERRFILE,
                        'cdbServer': cdbServer
                    }
            if j.task.type == 'DDD filtering':
                job.jobParameters = 'export EOS_MGM_URL=root://eoscompass.cern.ch;ppwd=$(pwd);export COMPASS_SW_PREFIX=/eos/experiment/compass/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;export PRODSOFT=%(PRODSOFT)s;xrdcp -N -f root://castorpublic.cern.ch/%(input_file)s\?svcClass=compasscdr .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;$CORAL/src/DaqDataDecoding/examples/how-to/ddd --filter-CAL --out=testevtdump.raw %(TMPRAWFILE)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;gzip payload_stderr.out;gzip payload_stdout.out;rm %(tail)s' % {
                    'TMPRAWFILE': TMPRAWFILE,
                    'TMPMDSTFILE': TMPMDSTFILE,
                    'TMPHISTFILE': TMPHISTFILE,
                    'TMPRICHFILE': TMPRICHFILE,
                    'PRODSOFT': PRODSOFT,
                    'input_file': j.file,
                    'ProdPathAndName': ProdPathAndName,
                    'prodPath': j.task.path,
                    'prodName': j.task.production,
                    'template': j.task.template,
                    'tail': tail,
                    'prodSlt': j.task.prodslt,
                    'EVTDUMPFILE': EVTDUMPFILE,
                    'STDOUTFILE': STDOUTFILE,
                    'STDERRFILE': STDERRFILE
                }

    #     fileIRaw = FileSpec()
    #     fileIRaw.lfn = "%s" % (input_file)
    #     fileIRaw.destinationDBlock = job.destinationDBlock
    #     fileIRaw.destinationSE     = job.destinationSE
    #     fileIRaw.dataset           = job.destinationDBlock
    #     fileIRaw.type = 'input'
    #     job.addFile(fileIRaw)

            fileOstdout = FileSpec()
            fileOstdout.lfn = "payload_stdout.out.gz"
            fileOstdout.destinationDBlock = job.destinationDBlock
            fileOstdout.destinationSE = job.destinationSE
            fileOstdout.dataset = job.destinationDBlock
            fileOstdout.type = 'output'
            job.addFile(fileOstdout)

            fileOstderr = FileSpec()
            fileOstderr.lfn = "payload_stderr.out.gz"
            fileOstderr.destinationDBlock = job.destinationDBlock
            fileOstderr.destinationSE = job.destinationSE
            fileOstderr.dataset = job.destinationDBlock
            fileOstderr.type = 'output'
            job.addFile(fileOstderr)

            fileOLog = FileSpec()
            fileOLog.lfn = "%(prodName)s-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.job.log.tgz" % {
                'prodName': j.task.production,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            fileOLog.destinationDBlock = job.destinationDBlock
            fileOLog.destinationSE = job.destinationSE
            fileOLog.dataset = job.destinationDBlock
            fileOLog.type = 'log'
            job.addFile(fileOLog)

            if j.task.type == 'test production' or j.task.type == 'mass production' or j.task.type == 'technical production':
                fileOmDST = FileSpec()
                fileOmDST.lfn = "%s" % (TMPMDSTFILE)
                fileOmDST.destinationDBlock = job.destinationDBlock
                fileOmDST.destinationSE = job.destinationSE
                fileOmDST.dataset = job.destinationDBlock
                fileOmDST.type = 'output'
                job.addFile(fileOmDST)

                fileOTrafdic = FileSpec()
                fileOTrafdic.lfn = "%s" % (TMPHISTFILE)
                fileOTrafdic.destinationDBlock = job.destinationDBlock
                fileOTrafdic.destinationSE = job.destinationSE
                fileOTrafdic.dataset = job.destinationDBlock
                fileOTrafdic.type = 'output'
                job.addFile(fileOTrafdic)

            if j.task.type == 'test production' or j.task.type == 'mass production' or j.task.type == 'technical production' or j.task.type == 'DDD filtering':
                fileOtestevtdump = FileSpec()
                fileOtestevtdump.lfn = "testevtdump.raw"
                fileOtestevtdump.destinationDBlock = job.destinationDBlock
                fileOtestevtdump.destinationSE = job.destinationSE
                fileOtestevtdump.dataset = job.destinationDBlock
                fileOtestevtdump.type = 'output'
                job.addFile(fileOtestevtdump)

            s, o = Client.submitJobs([job], srvID=aSrvID)
            logger.info(s)
            for x in o:
                logger.info("PandaID=%s" % x[0])
                if x[0] != 0 and x[0] != 'NULL':
                    j_update = Job.objects.get(id=j.id)
                    j_update.panda_id = x[0]
                    j_update.status = 'sent'
                    j_update.attempt = j_update.attempt + 1
                    j_update.date_updated = timezone.now()

                    try:
                        j_update.save()
                        logger.info('Job %s with PandaID %s updated at %s' %
                                    (j.id, x[0], timezone.now()))

                        if j_update.task.status == 'send':
                            logger.info(
                                'Going to update status of task %s from send to running'
                                % j_update.task.name)
                            t_update = Task.objects.get(id=j_update.task.id)
                            t_update.status = 'running'
                            t_update.date_updated = timezone.now()

                            try:
                                t_update.save()
                                logger.info('Task %s updated' % t_update.name)
                            except IntegrityError as e:
                                logger.exception(
                                    'Unique together catched, was not saved')
                            except DatabaseError as e:
                                logger.exception(
                                    'Something went wrong while saving: %s' %
                                    e.message)

                    except IntegrityError as e:
                        logger.exception(
                            'Unique together catched, was not saved')
                    except DatabaseError as e:
                        logger.exception(
                            'Something went wrong while saving: %s' %
                            e.message)
                else:
                    logger.info('Job %s was not added to PanDA' % j.id)
            i += 1

    logger.info('done')
Ejemplo n.º 17
0
import time
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

job = JobSpec()
job.jobDefinitionID = int(time.time()) % 10000
job.jobName = commands.getoutput('/usr/bin/uuidgen')
job.AtlasRelease = 'Atlas-9.0.4'
job.prodDBlock = 'pandatest.000003.dd.input'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput(
    '/usr/bin/uuidgen')
job.destinationSE = 'BNL_SE'

ids = {
    'pandatest.000003.dd.input._00028.junk':
    '6c19e1fc-ee8c-4bae-bd4c-c9e5c73aca27',
    'pandatest.000003.dd.input._00033.junk':
    '98f79ba1-1793-4253-aac7-bdf90a51d1ee',
    'pandatest.000003.dd.input._00039.junk':
    '33660dd5-7cef-422a-a7fc-6c24cb10deb1'
}
for lfn in ids.keys():
    file = FileSpec()
    file.lfn = lfn
    file.GUID = ids[file.lfn]
    file.dataset = 'pandatest.000003.dd.input'
    file.type = 'input'
    job.addFile(file)
Ejemplo n.º 18
0
    def master_prepare(self, app, appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('ExecutablePandaRTHandler master_prepare called for %s',
                     job.getFQID('.'))

        # set chirp variables
        if configPanda['chirpconfig'] or configPanda['chirpserver']:
            setChirpVariables()

#       Pack inputsandbox
        inputsandbox = 'sources.%s.tar' % commands.getoutput(
            'uuidgen 2> /dev/null')
        inpw = job.getInputWorkspace()
        # add user script to inputsandbox
        if hasattr(job.application.exe, "name"):
            if not job.application.exe in job.inputsandbox:
                job.inputsandbox.append(job.application.exe)

        for fname in [f.name for f in job.inputsandbox]:
            fname.rstrip(os.sep)
            path = fname[:fname.rfind(os.sep)]
            f = fname[fname.rfind(os.sep) + 1:]
            rc, output = commands.getstatusoutput(
                'tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f))
            if rc:
                logger.error('Packing inputsandbox failed with status %d', rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Packing inputsandbox failed.')
        if len(job.inputsandbox) > 0:
            rc, output = commands.getstatusoutput('gzip %s' %
                                                  (inpw.getPath(inputsandbox)))
            if rc:
                logger.error('Packing inputsandbox failed with status %d', rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Packing inputsandbox failed.')
            inputsandbox += ".gz"
        else:
            inputsandbox = None

#       Upload Inputsandbox
        if inputsandbox:
            logger.debug('Uploading source tarball ...')
            uploadSources(inpw.getPath(), os.path.basename(inputsandbox))
            self.inputsandbox = inputsandbox
        else:
            self.inputsandbox = None

#       input dataset
        if job.inputdata:
            if job.inputdata._name != 'DQ2Dataset':
                raise ApplicationConfigurationError(
                    None, 'PANDA application supports only DQ2Datasets')

        # run brokerage here if not splitting
        if not job.splitter:
            from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
            runPandaBrokerage(job)
        elif job.splitter._name not in [
                'DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask'
        ]:
            raise ApplicationConfigurationError(
                None, 'Panda splitter must be DQ2JobSplitter or ArgSplitter')

        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(
                None, 'site is still AUTO after brokerage!')

#       output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError(
                    None, 'Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        job.outputdata.datasetname, outlfn = dq2outputdatasetname(
            job.outputdata.datasetname, job.id, job.outputdata.isGroupDS,
            job.outputdata.groupname)

        self.outDsLocation = Client.PandaSites[job.backend.site]['ddm']

        try:
            Client.addDataset(job.outputdata.datasetname,
                              False,
                              location=self.outDsLocation)
            logger.info('Output dataset %s registered at %s' %
                        (job.outputdata.datasetname, self.outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname,
                                     location=self.outDsLocation)
        except exceptions.SystemExit:
            raise BackendError(
                'Panda', 'Exception in Client.addDataset %s: %s %s' %
                (job.outputdata.datasetname, sys.exc_info()[0],
                 sys.exc_info()[1]))

        # handle the libds
        if job.backend.libds:
            self.libDataset = job.backend.libds
            self.fileBO = getLibFileSpecFromLibDS(self.libDataset)
            self.library = self.fileBO.lfn
        elif job.backend.bexec:
            self.libDataset = job.outputdata.datasetname + '.lib'
            self.library = '%s.tgz' % self.libDataset
            try:
                Client.addDataset(self.libDataset,
                                  False,
                                  location=self.outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset,
                                         location=self.outDsLocation)
                logger.info('Lib dataset %s registered at %s' %
                            (self.libDataset, self.outDsLocation))
            except exceptions.SystemExit:
                raise BackendError(
                    'Panda', 'Exception in Client.addDataset %s: %s %s' %
                    (self.libDataset, sys.exc_info()[0], sys.exc_info()[1]))

        # collect extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.outputsandbox:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # create build job
        if job.backend.bexec != '':
            jspec = JobSpec()
            jspec.jobDefinitionID = job.id
            jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
            jspec.transformation = '%s/buildGen-00-00-01' % Client.baseURLSUB
            if Client.isDQ2free(job.backend.site):
                jspec.destinationDBlock = '%s/%s' % (
                    job.outputdata.datasetname, self.libDataset)
                jspec.destinationSE = 'local'
            else:
                jspec.destinationDBlock = self.libDataset
                jspec.destinationSE = job.backend.site
            jspec.prodSourceLabel = configPanda['prodSourceLabelBuild']
            jspec.processingType = configPanda['processingType']
            jspec.assignedPriority = configPanda['assignedPriorityBuild']
            jspec.computingSite = job.backend.site
            jspec.cloud = job.backend.requirements.cloud
            jspec.jobParameters = '-o %s' % (self.library)
            if self.inputsandbox:
                jspec.jobParameters += ' -i %s' % (self.inputsandbox)
            else:
                raise ApplicationConfigurationError(
                    None,
                    'Executable on Panda with build job defined, but inputsandbox is emtpy !'
                )
            matchURL = re.search('(http.*://[^/]+)/', Client.baseURLCSRVSSL)
            if matchURL:
                jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1)
            if job.backend.bexec != '':
                jspec.jobParameters += ' --bexec "%s" ' % urllib.quote(
                    job.backend.bexec)
                jspec.jobParameters += ' -r %s ' % '.'

            fout = FileSpec()
            fout.lfn = self.library
            fout.type = 'output'
            fout.dataset = self.libDataset
            fout.destinationDBlock = self.libDataset
            jspec.addFile(fout)

            flog = FileSpec()
            flog.lfn = '%s.log.tgz' % self.libDataset
            flog.type = 'log'
            flog.dataset = self.libDataset
            flog.destinationDBlock = self.libDataset
            jspec.addFile(flog)
            return jspec
        else:
            return None
Ejemplo n.º 19
0
from taskbuffer.FileSpec import FileSpec

aSrvID = None

for idx,argv in enumerate(sys.argv):
    if argv == '-s':
        aSrvID = sys.argv[idx+1]
        sys.argv = sys.argv[:idx]
        break

site = sys.argv[1]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName    = None

job = JobSpec()
job.jobDefinitionID   = int(time.time()) % 10000
job.jobName           = "%s" % commands.getoutput('uuidgen')
job.transformation    = 'https://atlpan.web.cern.ch/atlpan/test.sh'
job.destinationDBlock = datasetName
job.destinationSE     = destName
job.currentPriority   = 1000
job.prodSourceLabel   = 'test'
job.computingSite     = site

job.jobParameters="aaaaa"

fileOL = FileSpec()
fileOL.lfn = "%s.job.log.tgz" % job.jobName
fileOL.destinationDBlock = job.destinationDBlock
fileOL.destinationSE     = job.destinationSE
Ejemplo n.º 20
0
import time
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

job = JobSpec()
job.jobDefinitionID   = int(time.time()) % 10000
job.jobName           = commands.getoutput('/usr/bin/uuidgen') 
job.AtlasRelease      = 'Atlas-9.0.4'
job.prodDBlock        = 'pandatest.000003.dd.input'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('/usr/bin/uuidgen')
job.destinationSE     = 'BNL_SE'

ids = {'pandatest.000003.dd.input._00028.junk':'6c19e1fc-ee8c-4bae-bd4c-c9e5c73aca27',
       'pandatest.000003.dd.input._00033.junk':'98f79ba1-1793-4253-aac7-bdf90a51d1ee',
       'pandatest.000003.dd.input._00039.junk':'33660dd5-7cef-422a-a7fc-6c24cb10deb1'}
for lfn in ids.keys():
    file = FileSpec()
    file.lfn = lfn
    file.GUID = ids[file.lfn]
    file.dataset = 'pandatest.000003.dd.input'
    file.type = 'input'
    job.addFile(file)

s,o = Client.submitJobs([job])
print "---------------------"
print s
print o
print "---------------------"
s,o = Client.getJobStatus([4934, 4766, 4767, 4768, 4769])
Ejemplo n.º 21
0
    cloud = sys.argv[2]
    prodDBlock = sys.argv[3]
    inputFile = sys.argv[4]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile: None,
}

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID = (time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index)
    job.AtlasRelease = 'Atlas-16.6.2'
    job.homepackage = 'AtlasProduction/16.6.2.1'
    job.transformation = 'AtlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite = site
    job.prodDBlock = prodDBlock

    job.prodSourceLabel = 'test'
    job.processingType = 'test'
    job.currentPriority = 10000
    job.cloud = cloud
    job.cmtConfig = 'i686-slc5-gcc43-opt'
Ejemplo n.º 22
0
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

if len(sys.argv) > 1:
    site = sys.argv[1]
else:
    site = None

jobList = []
for i in range(2):
    datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
    destName = 'ANALY_BNL_ATLAS_1'

    job = JobSpec()
    job.jobDefinitionID = 1
    job.jobName = commands.getoutput('uuidgen')
    job.AtlasRelease = 'Atlas-12.0.2'
    job.homepackage = 'AnalysisTransforms'
    job.transformation = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena2'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 3000
    job.prodSourceLabel = 'user'
    job.computingSite = site
    job.prodDBlock = 'testIdeal_06.005001.pythia_minbias.recon.AOD.v12000103'

    fileOL = FileSpec()
    fileOL.lfn = "%s.job.log.tgz" % commands.getoutput('uuidgen')
    fileOL.destinationDBlock = job.destinationDBlock
Ejemplo n.º 23
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """Prepare the specific aspec of each subjob.
           Returns: subjobconfig list of objects understood by backends."""

        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime
        from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs

        # make sure we have the correct siteType
        refreshPandaSpecs()

        job = app._getParent()
        masterjob = job._getRoot()

        logger.debug('ProdTransPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        job.backend.actualCE = job.backend.site
        job.backend.requirements.cloud = Client.PandaSites[
            job.backend.site]['cloud']

        # check that the site is in a submit-able status
        if not job.splitter or job.splitter._name != 'DQ2JobSplitter':
            allowed_sites = job.backend.list_ddm_sites()

        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
            tmpDsExist = False
            if (configPanda['processingType'].startswith('gangarobot') or
                    configPanda['processingType'].startswith('hammercloud')):
                #if Client.getDatasets(job.outputdata.datasetname):
                if getDatasets(job.outputdata.datasetname):
                    tmpDsExist = True
                    logger.info('Re-using output dataset %s' %
                                job.outputdata.datasetname)
            if not configPanda[
                    'specialHandling'] == 'ddm:rucio' and not configPanda[
                        'processingType'].startswith(
                            'gangarobot'
                        ) and not configPanda['processingType'].startswith(
                            'hammercloud') and not configPanda[
                                'processingType'].startswith('rucio_test'):
                Client.addDataset(job.outputdata.datasetname,
                                  False,
                                  location=outDsLocation,
                                  allowProdDisk=True,
                                  dsExist=tmpDsExist)
            logger.info('Output dataset %s registered at %s' %
                        (job.outputdata.datasetname, outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation)
        except exceptions.SystemExit:
            raise BackendError(
                'Panda', 'Exception in adding dataset %s: %s %s' %
                (job.outputdata.datasetname, sys.exc_info()[0],
                 sys.exc_info()[1]))

        # JobSpec.
        jspec = JobSpec()
        jspec.currentPriority = app.priority
        jspec.jobDefinitionID = masterjob.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.coreCount = app.core_count
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release
        jspec.homepackage = app.home_package
        jspec.transformation = app.transformation
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = job.backend.site
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        if app.prod_source_label:
            jspec.prodSourceLabel = app.prod_source_label
        else:
            jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.specialHandling = configPanda['specialHandling']
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        jspec.cmtConfig = app.atlas_cmtconfig
        if app.dbrelease == 'LATEST':
            try:
                latest_dbrelease = getLatestDBReleaseCaching()
            except:
                from pandatools import Client
                latest_dbrelease = Client.getLatestDBRelease()
            m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease)
            if m:
                self.dbrelease_dataset = m.group(1)
                self.dbrelease = m.group(2)
            else:
                raise ApplicationConfigurationError(
                    None,
                    "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually."
                )
        else:
            self.dbrelease_dataset = app.dbrelease_dataset
            self.dbrelease = app.dbrelease
        jspec.jobParameters = app.job_parameters

        if self.dbrelease:
            if self.dbrelease == 'current':
                jspec.jobParameters += ' --DBRelease=current'
            else:
                if jspec.transformation.endswith(
                        "_tf.py") or jspec.transformation.endswith("_tf"):
                    jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (
                        self.dbrelease, )
                else:
                    jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (
                        self.dbrelease, )
                dbspec = FileSpec()
                dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease
                dbspec.dataset = self.dbrelease_dataset
                dbspec.prodDBlock = jspec.prodDBlock
                dbspec.type = 'input'
                jspec.addFile(dbspec)

        if job.inputdata:
            m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)',
                          job.inputdata.dataset[0])
            if not m:
                logger.error("Error retrieving run number from dataset name")
                #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name")
                runnumber = 105200
            else:
                runnumber = int(m.group(2))
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --runNumber %d' % runnumber
            else:
                jspec.jobParameters += ' RunNumber=%d' % runnumber

        # Output files.
        randomized_lfns = []
        ilfn = 0
        for lfn, lfntype in zip(app.output_files, app.output_type):
            ofspec = FileSpec()
            if app.randomize_lfns:
                randomized_lfn = lfn + (
                    '.%s.%d.%s' %
                    (job.backend.site, int(time.time()),
                     commands.getoutput('uuidgen 2> /dev/null')[:4]))
            else:
                randomized_lfn = lfn
            ofspec.lfn = randomized_lfn
            randomized_lfns.append(randomized_lfn)
            ofspec.destinationDBlock = jspec.destinationDBlock
            ofspec.destinationSE = jspec.destinationSE
            ofspec.dataset = jspec.destinationDBlock
            ofspec.type = 'output'
            jspec.addFile(ofspec)
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --output%sFile %s' % (
                    lfntype, randomized_lfns[ilfn])
            else:
                jspec.jobParameters += ' output%sFile=%s' % (
                    lfntype, randomized_lfns[ilfn])
            ilfn = ilfn + 1

        # Input files.
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(
                    job.inputdata.guids, job.inputdata.names,
                    job.inputdata.sizes, job.inputdata.checksums,
                    job.inputdata.scopes):
                ifspec = FileSpec()
                ifspec.lfn = lfn
                ifspec.GUID = guid
                ifspec.fsize = size
                ifspec.md5sum = checksum
                ifspec.scope = scope
                ifspec.dataset = jspec.prodDBlock
                ifspec.prodDBlock = jspec.prodDBlock
                ifspec.type = 'input'
                jspec.addFile(ifspec)
            if app.input_type:
                itype = app.input_type
            else:
                itype = m.group(5)
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(
                    job.inputdata.names))
            else:
                jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(
                    job.inputdata.names))

        # Log files.
        lfspec = FileSpec()
        lfspec.lfn = '%s.job.log.tgz' % jspec.jobName
        lfspec.destinationDBlock = jspec.destinationDBlock
        lfspec.destinationSE = jspec.destinationSE
        lfspec.dataset = jspec.destinationDBlock
        lfspec.type = 'log'
        jspec.addFile(lfspec)

        return jspec
Ejemplo n.º 24
0
    def master_prepare(self,app,appmasterconfig):

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler master_prepare called for %s', job.getFQID('.'))
        usertag = configDQ2['usertag']
        #usertag='user09'
        nickname = getNickname(allowMissingNickname=True)
        self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (usertag,nickname,commands.getoutput('hostname').split('.')[0],int(time.time()),job.id)
#        self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity())
        sources = 'sources.%s.tar.gz' % commands.getoutput('uuidgen 2> /dev/null') 
        self.library = '%s.lib.tgz' % self.libDataset

        # check DBRelease
        # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1:
         #   raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'")

#       unpack library
        logger.debug('Creating source tarball ...')        
        tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null')
        os.mkdir(tmpdir)

        inputbox=[]
        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [ File(app.transform_archive) ]
        if app.evgen_job_option:
            self.evgen_job_option=app.evgen_job_option
            if os.path.exists(app.evgen_job_option):
                # locally modified job option file to add to the input sand box
                inputbox += [ File(app.evgen_job_option) ]
                self.evgen_job_option=app.evgen_job_option.split("/")[-1]

         
#       add input sandbox files
        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [ file ]
#        add option files
        for extFile in job.backend.extOutFile:
            try:
                shutil.copy(extFile,tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(extFile,tmpdir)
#       fill the archive
        for opt_file in inputbox:
            try:
                shutil.copy(opt_file.name,tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(opt_file.name,tmpdir)
#       now tar it up again

        inpw = job.getInputWorkspace()
        rc, output = commands.getstatusoutput('tar czf %s -C %s .' % (inpw.getPath(sources),tmpdir))
        if rc:
            logger.error('Packing sources failed with status %d',rc)
            logger.error(output)
            raise ApplicationConfigurationError(None,'Packing sources failed.')

        shutil.rmtree(tmpdir)

#       upload sources

        logger.debug('Uploading source tarball ...')
        try:
            cwd = os.getcwd()
            os.chdir(inpw.getPath())
            rc, output = Client.putFile(sources)
            if output != 'True':
                logger.error('Uploading sources %s failed. Status = %d', sources, rc)
                logger.error(output)
                raise ApplicationConfigurationError(None,'Uploading archive failed')
        finally:
            os.chdir(cwd)      


        # Use Panda's brokerage
##         if job.inputdata and len(app.sites)>0:
##             # update cloud, use inputdata's
##             from dq2.info.TiersOfATLAS import whichCloud,ToACache
##             inclouds=[]
##             for site in app.sites:
##                 cloudSite=whichCloud(app.sites[0])
##                 if cloudSite not in inclouds:
##                     inclouds.append(cloudSite)
##             # now converting inclouds content into proper brokering stuff.
##             outclouds=[]
##             for cloudSite in inclouds:
##                 for cloudID, eachCloud in ToACache.dbcloud.iteritems():
##                     if cloudSite==eachCloud:
##                         cloud=cloudID
##                         outclouds.append(cloud)
##                         break
                    
##             print outclouds
##             # finally, matching with user's wishes
##             if len(outclouds)>0:
##                 if not job.backend.requirements.cloud: # no user wish, update
##                     job.backend.requirements.cloud=outclouds[0]
##                 else:
##                     try:
##                         assert job.backend.requirements.cloud in outclouds
##                     except:
##                         raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds)))
                                                            
        from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
        
        runPandaBrokerage(job)
        
        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(None,'site is still AUTO after brokerage!')

        # output dataset preparation and registration
        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
        except:
            raise ApplicationConfigurationError(None,"Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site)
        if not app.dryrun:
            for outtype in app.outputpaths.keys():
                dset=string.replace(app.outputpaths[outtype],"/",".")
                dset=dset[1:]
                # dataset registration must be done only once.
                print "registering output dataset %s at %s" % (dset,outDsLocation)
                try:
                    Client.addDataset(dset,False,location=outDsLocation)
                    dq2_set_dataset_lifetime(dset, location=outDsLocation)
                except:
                    raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % dset)
            # extend registration to build job lib dataset:
            print "registering output dataset %s at %s" % (self.libDataset,outDsLocation)

            try:
                Client.addDataset(self.libDataset,False,location=outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, outDsLocation)
            except:
                raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % self.libDataset)


        ###
        cacheVer = "-AtlasProduction_" + str(app.prod_release)
            
        logger.debug("master job submit?")
        self.outsite=job.backend.site
        if app.se_name and app.se_name != "none" and not self.outsite:
            self.outsite=app.se_name

       
        #       create build job
        jspec = JobSpec()
        jspec.jobDefinitionID   = job.id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        jspec.homepackage       = 'AnalysisTransforms'+cacheVer#+nightVer
        jspec.transformation    = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare()
        jspec.destinationDBlock = self.libDataset
        jspec.destinationSE     = job.backend.site
        jspec.prodSourceLabel   = 'panda'
        jspec.assignedPriority  = 2000
        jspec.computingSite     = job.backend.site
        jspec.cloud             = job.backend.requirements.cloud
#        jspec.jobParameters     = self.args not known yet
        jspec.jobParameters     = '-o %s' % (self.library)
        if app.userarea:
            print app.userarea
            jspec.jobParameters     += ' -i %s' % (os.path.basename(app.userarea))
        else:
            jspec.jobParameters     += ' -i %s' % (sources)
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        
        matchURL = re.search('(http.*://[^/]+)/',Client.baseURLSSL)
        if matchURL:
            jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1)

        fout = FileSpec()
        fout.lfn  = self.library
        fout.type = 'output'
        fout.dataset = self.libDataset
        fout.destinationDBlock = self.libDataset
        jspec.addFile(fout)

        flog = FileSpec()
        flog.lfn = '%s.log.tgz' % self.libDataset
        flog.type = 'log'
        flog.dataset = self.libDataset
        flog.destinationDBlock = self.libDataset
        jspec.addFile(flog)
        #print "MASTER JOB DETAILS:",jspec.jobParameters

        return jspec
Ejemplo n.º 25
0
    site = sys.argv[1]
    cloud = 'CA'
elif len(sys.argv) == 3:
    site = sys.argv[1]
    cloud = sys.argv[2]
else:
    site = None
    cloud = None

datasetName = 'panda.destDB.%s_tid999991' % commands.getoutput('uuidgen')
taskid = 999989

jobList = []

for i in range(1):
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i)
    #    job.AtlasRelease      = 'Atlas-12.0.6'
    #    job.homepackage       = 'AtlasProduction/12.0.6.5'
    job.AtlasRelease = 'Atlas-12.0.7'
    job.homepackage = 'AtlasProduction/12.0.7.1'

    job.transformation = 'csc_evgen_trf.py'
    job.destinationDBlock = datasetName
    #    job.destinationSE     = destName
    #    job.cloud             = 'CA'
    job.cloud = cloud
    job.taskID = taskid
    job.currentPriority = 1000
    job.prodSourceLabel = 'test'
Ejemplo n.º 26
0
    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaPandaRTHandler prepare called for %s', job.getFQID('.'))

#       in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter
        if job.inputdata and not job._getRoot().subjobs:

            if not job.inputdata.names:
                
                contents = job.inputdata.get_contents(overlap=False, size=True)

                for ds in contents.keys():

                    for f in contents[ds]:
                        job.inputdata.guids.append( f[0] )
                        job.inputdata.names.append( f[1][0] )
                        job.inputdata.sizes.append( f[1][1] )
                        job.inputdata.checksums.append( f[1][2] )
                        job.inputdata.scopes.append( f[1][3] )


        site = job._getRoot().backend.site
        job.backend.site = site
        job.backend.actualCE = site
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

#       if no outputdata are given
        if not job.outputdata:
            job.outputdata = DQ2OutputDataset()
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname
        #if not job.outputdata.datasetname:
        else:
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname

        if not job.outputdata.datasetname:
            raise ApplicationConfigurationError('DQ2OutputDataset has no datasetname')

        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
        jspec.transformation    = '%s/runGen-00-00-02' % Client.baseURLSUB
        if job.inputdata:
            jspec.prodDBlock    = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock    = 'NULL'
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            if not job._getRoot().subjobs or job.id == 0:
                logger.warning('You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.')
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = site
        jspec.prodSourceLabel   = configPanda['prodSourceLabelRun']
        jspec.processingType    = configPanda['processingType']
        jspec.assignedPriority  = configPanda['assignedPriorityRun']
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        # cputime     
        if job.backend.requirements.cputime != -1:
            jspec.maxCpuCount = job.backend.requirements.cputime
        jspec.computingSite     = site

#       library (source files)
        if job.backend.libds:
            flib = FileSpec()
            flib.lfn            = self.fileBO.lfn
            flib.GUID           = self.fileBO.GUID
            flib.type           = 'input'
            flib.status         = self.fileBO.status
            flib.dataset        = self.fileBO.destinationDBlock
            flib.dispatchDBlock = self.fileBO.destinationDBlock
            jspec.addFile(flib)
        elif job.backend.bexec:
            flib = FileSpec()
            flib.lfn            = self.library
            flib.type           = 'input'
            flib.dataset        = self.libDataset
            flib.dispatchDBlock = self.libDataset
            jspec.addFile(flib)

#       input files FIXME: many more input types
        if job.inputdata:            
            for guid, lfn, size, checksum, scope in zip(job.inputdata.guids,job.inputdata.names,job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes):
                finp = FileSpec()
                finp.lfn            = lfn
                finp.GUID           = guid
                finp.scope          = scope
                
#            finp.fsize =
#            finp.md5sum =
                finp.dataset        = job.inputdata.dataset[0]
                finp.prodDBlock     = job.inputdata.dataset[0]
                finp.dispatchDBlock = job.inputdata.dataset[0]
                finp.type           = 'input'
                finp.status         = 'ready'
                jspec.addFile(finp)

#       output files
#        outMap = {}
        
        #FIXME: if options.outMeta != []:
        self.rundirectory = "."

#       log files

        flog = FileSpec()
        flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname
        flog.type = 'log'
        flog.dataset           = job.outputdata.datasetname
        flog.destinationDBlock = job.outputdata.datasetname
        flog.destinationSE     = job.backend.site
        jspec.addFile(flog)

#       job parameters
        param = ''

        # source URL
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL)
        srcURL = ""
        if matchURL != None:
            srcURL = matchURL.group(1)
            param += " --sourceURL %s " % srcURL

        param += '-r "%s" ' % self.rundirectory

        exe_name = job.application.exe
        if job.backend.bexec == '':
            if hasattr(job.application.exe, "name"):
                exe_name = os.path.basename(job.application.exe.name)

            # set jobO parameter
            if job.application.args:
                param += ' -j "" -p "%s %s" '%(exe_name,urllib.quote(" ".join(job.application.args)))
            else:
                param += ' -j "" -p "%s" '%exe_name
            if self.inputsandbox:
                param += ' -a %s '%self.inputsandbox

        else:
            param += '-l %s ' % self.library
            param += '-j "" -p "%s %s" ' % ( exe_name,urllib.quote(" ".join(job.application.args)))

        if job.inputdata:
            param += '-i "%s" ' % job.inputdata.names

        # fill outfiles
        outfiles = {}
        for f in self.extOutFile:
            tarnum = 1
            if f.find('*') != -1:
            # archive *
                outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S") )
                tarnum += 1
            else:
                outfiles[f] = "%s.%s.%s" %(f, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S"))

            fout = FileSpec()
            fout.lfn = outfiles[f]
            fout.type = 'output'
            fout.dataset           = job.outputdata.datasetname
            fout.destinationDBlock = job.outputdata.datasetname
            fout.destinationSE     = job.backend.site
            jspec.addFile(fout)

        param += '-o "%s" ' % (outfiles) # must be double quotes, because python prints strings in 'single quotes' 

        for file in jspec.Files:
            if file.type in [ 'output', 'log'] and configPanda['chirpconfig']:
                file.dispatchDBlockToken = configPanda['chirpconfig']
                logger.debug('chirp file %s',file)

        jspec.jobParameters = param
        
        return jspec
Ejemplo n.º 27
0
    and PIPELINE_STREAM is not None:
    jobName = 'job.%(PIPELINE_PROCESSINSTANCE)s.%(PIPELINE_TASK)s.%(PIPELINE_EXECUTIONNUMBER)s.%(prodUserName)s.%(PIPELINE_STREAM)s' % \
    {'prodUserName': str(prodUserName), \
     'PIPELINE_TASK': str(PIPELINE_TASK), \
     'PIPELINE_EXECUTIONNUMBER': str(PIPELINE_EXECUTIONNUMBER), \
     'PIPELINE_STREAM': str(PIPELINE_STREAM), \
     'PIPELINE_PROCESSINSTANCE': str(PIPELINE_PROCESSINSTANCE) \
     }
else:
    jobName = "%s" % commands.getoutput('uuidgen')

if PIPELINE_STREAM is not None:
    jobDefinitionID = PIPELINE_STREAM
else:
    jobDefinitionID = int(time.time()) % 10000
job = JobSpec()
job.jobDefinitionID = jobDefinitionID
job.jobName = jobName
job.transformation    = 'http://pandawms.org/pandawms-jobcache/lsst-trf.sh'
job.destinationDBlock = datasetName
job.destinationSE     = 'local' 
job.currentPriority   = 1000
job.prodSourceLabel = 'panda'
job.jobParameters = ' --lsstJobParams="%s" ' % lsstJobParams
if prodUserName is not None:
    job.prodUserName = prodUserName
else:
    job.prodUserName = prodUserNameDefault
if PIPELINE_PROCESSINSTANCE is not None:
    job.taskID = PIPELINE_PROCESSINSTANCE
if PIPELINE_EXECUTIONNUMBER is not None:
Ejemplo n.º 28
0
    step = m.group(2)
    if format=='HITS':
        step = 'simul'
    # append
    oDatasets.append('%s.%s.%s.%s_tid%06d' % (m.group(1),step,format,m.group(3),int(taskID)))

# log dataset
lDataset = '%s.%s.%s.%s_tid%06d' % (m.group(1),m.group(2),'log',m.group(3),int(taskID))


# instantiate JobSpecs
iJob = 0
jobList = []
for line in taskFile:
    iJob += 1
    job = JobSpec()
    # job ID  ###### FIXME
    job.jobDefinitionID   = int(time.time()) % 10000
    # job name
    job.jobName           = "%s_%05d.job" % (taskName,iJob)
    # AtlasRelease
    if len(re.findall('\.',trfVer)) > 2:
        match = re.search('^(\d+\.\d+\.\d+)',trfVer)
        job.AtlasRelease  = 'Atlas-%s' % match.group(1)
    else:
        job.AtlasRelease  = 'Atlas-%s' % trfVer
    # homepackage
    vers = trfVer.split('.')
    if int(vers[0]) <= 11:
        job.homepackage = 'JobTransforms'
        for ver in vers:
Ejemplo n.º 29
0
    def defineEvgen16Job(self, i):
        """Define an Evgen16 job based on predefined values and randomly generated names
        """

        job = JobSpec()
        job.computingSite = self.__site
        job.cloud = self.__cloud

        job.jobDefinitionID = int(time.time()) % 10000
        job.jobName = "%s_%d" % (uuid.uuid1(), i)
        job.AtlasRelease = 'Atlas-16.6.2'
        job.homepackage = 'AtlasProduction/16.6.2.1'
        job.transformation = 'Evgen_trf.py'
        job.destinationDBlock = self.__datasetName
        job.destinationSE = self.__destName
        job.currentPriority = 10000
        job.prodSourceLabel = 'test'
        job.cmtConfig = 'i686-slc5-gcc43-opt'

        #Output file
        fileO = FileSpec()
        fileO.lfn = "%s.evgen.pool.root" % job.jobName
        fileO.destinationDBlock = job.destinationDBlock
        fileO.destinationSE = job.destinationSE
        fileO.dataset = job.destinationDBlock
        fileO.destinationDBlockToken = 'ATLASDATADISK'
        fileO.type = 'output'
        job.addFile(fileO)

        #Log file
        fileL = FileSpec()
        fileL.lfn = "%s.job.log.tgz" % job.jobName
        fileL.destinationDBlock = job.destinationDBlock
        fileL.destinationSE = job.destinationSE
        fileL.dataset = job.destinationDBlock
        fileL.destinationDBlockToken = 'ATLASDATADISK'
        fileL.type = 'log'
        job.addFile(fileL)

        job.jobParameters = "2760 105048 19901 101 200 MC10.105048.PythiaB_ccmu3mu1X.py %s NONE NONE NONE MC10JobOpts-latest-test.tar.gz" % fileO.lfn
        return job
Ejemplo n.º 30
0
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec
from taskbuffer.DatasetSpec import DatasetSpec
from taskbuffer.DBProxyPool import DBProxyPool

import getpass
passwd = getpass.getpass()

pool = DBProxyPool('adbpro.usatlas.bnl.gov', passwd, 2)

proxy = pool.getProxy()

import sys
import commands

job1 = JobSpec()
job1.PandaID = 'NULL'
job1.jobStatus = 'unknown'
job1.computingSite = "aaa"
f11 = FileSpec()
f11.lfn = 'in1.pool.root'
f11.type = 'input'
job1.addFile(f11)
f12 = FileSpec()
f12.lfn = 'out1.pool.root'
f12.type = 'output'
job1.addFile(f12)

job2 = JobSpec()
job2.PandaID = 'NULL'
job2.jobStatus = 'unknown'
Ejemplo n.º 31
0
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

if len(sys.argv) > 1:
    site = sys.argv[1]
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = 'BNL_SE'

jobListE = []
lfnListE = []

for i in range(2):
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = commands.getoutput('uuidgen')
    job.AtlasRelease = 'Atlas-11.0.3'
    job.homepackage = 'JobTransforms-11-00-03-03'
    job.transformation = 'share/csc.evgen.trf'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 1000
    job.prodSourceLabel = 'test'
    job.computingSite = site

    file = FileSpec()
    file.lfn = "%s.evgen.pool.root" % commands.getoutput('uuidgen')
    lfnListE.append(file.lfn)
    file.lfn += ('.%d' % (i + 1))
Ejemplo n.º 32
0
def send_job(jobid, siteid):
    _logger.debug('Jobid: ' + str(jobid))

    site = sites_.get(siteid)

    job = jobs_.get(int(jobid))
    cont = job.container
    files_catalog = cont.files

    fscope = getScope(job.owner.username)
    datasetName = '{}:{}'.format(fscope, cont.guid)

    distributive = job.distr.name
    release = job.distr.release

    # Prepare runScript
    parameters = job.distr.command
    parameters = parameters.replace("$COMMAND$", job.params)
    parameters = parameters.replace("$USERNAME$", job.owner.username)
    parameters = parameters.replace("$WORKINGGROUP$", job.owner.working_group)

    # Prepare metadata
    metadata = dict(user=job.owner.username)

    # Prepare PanDA Object
    pandajob = JobSpec()
    pandajob.jobDefinitionID = int(time.time()) % 10000
    pandajob.jobName = cont.guid
    pandajob.transformation = client_config.DEFAULT_TRF
    pandajob.destinationDBlock = datasetName
    pandajob.destinationSE = site.se
    pandajob.currentPriority = 1000
    pandajob.prodSourceLabel = 'user'
    pandajob.computingSite = site.ce
    pandajob.cloud = 'RU'
    pandajob.VO = 'atlas'
    pandajob.prodDBlock = "%s:%s" % (fscope, pandajob.jobName)
    pandajob.coreCount = job.corecount
    pandajob.metadata = json.dumps(metadata)
    #pandajob.workingGroup = job.owner.working_group

    if site.encode_commands:
        # It requires script wrapper on cluster side
        pandajob.jobParameters = '%s %s %s "%s"' % (cont.guid, release,
                                                    distributive, parameters)
    else:
        pandajob.jobParameters = parameters

    has_input = False
    for fcc in files_catalog:
        if fcc.type == 'input':
            f = fcc.file
            guid = f.guid
            fileIT = FileSpec()
            fileIT.lfn = f.lfn
            fileIT.dataset = pandajob.prodDBlock
            fileIT.prodDBlock = pandajob.prodDBlock
            fileIT.type = 'input'
            fileIT.scope = fscope
            fileIT.status = 'ready'
            fileIT.GUID = guid
            pandajob.addFile(fileIT)

            has_input = True
        if fcc.type == 'output':
            f = fcc.file
            fileOT = FileSpec()
            fileOT.lfn = f.lfn
            fileOT.destinationDBlock = pandajob.prodDBlock
            fileOT.destinationSE = pandajob.destinationSE
            fileOT.dataset = pandajob.prodDBlock
            fileOT.type = 'output'
            fileOT.scope = fscope
            fileOT.GUID = f.guid
            pandajob.addFile(fileOT)

            # Save replica meta
            fc.new_replica(f, site)

    if not has_input:
        # Add fake input
        fileIT = FileSpec()
        fileIT.lfn = "fake.input"
        fileIT.dataset = pandajob.prodDBlock
        fileIT.prodDBlock = pandajob.prodDBlock
        fileIT.type = 'input'
        fileIT.scope = fscope
        fileIT.status = 'ready'
        fileIT.GUID = "fake.guid"
        pandajob.addFile(fileIT)

    # Prepare lof file
    fileOL = FileSpec()
    fileOL.lfn = "%s.log.tgz" % pandajob.jobName
    fileOL.destinationDBlock = pandajob.destinationDBlock
    fileOL.destinationSE = pandajob.destinationSE
    fileOL.dataset = '{}:logs'.format(fscope)
    fileOL.type = 'log'
    fileOL.scope = 'panda'
    pandajob.addFile(fileOL)

    # Save log meta
    log = File()
    log.scope = fscope
    log.lfn = fileOL.lfn
    log.guid = getGUID(log.scope, log.lfn)
    log.type = 'log'
    log.status = 'defined'
    files_.save(log)

    # Save replica meta
    fc.new_replica(log, site)

    # Register file in container
    fc.reg_file_in_cont(log, cont, 'log')

    # Submit job
    o = submitJobs([pandajob])
    x = o[0]

    try:
        #update PandaID
        PandaID = int(x[0])
        job.pandaid = PandaID
        job.ce = site.ce
    except:
        job.status = 'submit_error'
    jobs_.save(job)

    return 0
Ejemplo n.º 33
0
    def run(self):
        try:
            while True:
                _logger.debug('%s start' % self.pandaID)
                # query job
                job = self.taskBuffer.peekJobs([self.pandaID],
                                               fromDefined=False,
                                               fromArchived=False,
                                               fromWaiting=False)[0]
                _logger.debug('%s in %s' % (self.pandaID, job.jobStatus))
                # check job status
                if job == None:
                    _logger.debug('%s escape : not found' % self.pandaID)
                    return
                if not job.jobStatus in [
                        'running', 'sent', 'starting', 'holding', 'stagein',
                        'stageout'
                ]:
                    if job.jobStatus == 'transferring' and (
                            job.prodSourceLabel in ['user', 'panda']
                            or job.jobSubStatus not in [None, 'NULL', '']):
                        pass
                    else:
                        _logger.debug('%s escape : %s' %
                                      (self.pandaID, job.jobStatus))
                        return
                # time limit
                timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
                    minutes=self.sleepTime)
                if job.modificationTime < timeLimit or (
                        job.endTime != 'NULL' and job.endTime < timeLimit):
                    _logger.debug(
                        '%s %s lastmod:%s endtime:%s' %
                        (job.PandaID, job.jobStatus, str(
                            job.modificationTime), str(job.endTime)))
                    destDBList = []
                    if job.jobStatus == 'sent':
                        # sent job didn't receive reply from pilot within 30 min
                        job.jobDispatcherErrorCode = ErrorCode.EC_SendError
                        job.jobDispatcherErrorDiag = "Sent job didn't receive reply from pilot within 30 min"
                    elif job.exeErrorDiag == 'NULL' and job.pilotErrorDiag == 'NULL':
                        # lost heartbeat
                        job.jobDispatcherErrorCode = ErrorCode.EC_Watcher
                        if job.jobDispatcherErrorDiag == 'NULL':
                            if job.endTime == 'NULL':
                                # normal lost heartbeat
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(
                                    job.modificationTime)
                            else:
                                # job recovery failed
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(
                                    job.endTime)
                                if job.jobStatus == 'transferring':
                                    job.jobDispatcherErrorDiag += ' in transferring'
                            # get worker
                            workerSpecs = self.taskBuffer.getWorkersForJob(
                                job.PandaID)
                            if len(workerSpecs) > 0:
                                workerSpec = workerSpecs[0]
                                if workerSpec.status in [
                                        'finished', 'failed', 'cancelled',
                                        'missed'
                                ]:
                                    job.supErrorCode = SupErrors.error_codes[
                                        'WORKER_ALREADY_DONE']
                                    job.supErrorDiag = 'worker already {0} at {1} with {2}'.format(
                                        workerSpec.status,
                                        str(workerSpec.endTime),
                                        workerSpec.diagMessage)
                                    job.supErrorDiag = JobSpec.truncateStringAttr(
                                        'supErrorDiag', job.supErrorDiag)
                    else:
                        # job recovery failed
                        job.jobDispatcherErrorCode = ErrorCode.EC_Recovery
                        job.jobDispatcherErrorDiag = 'job recovery failed for %s hours' % (
                            self.sleepTime / 60)
                    # set job status
                    job.jobStatus = 'failed'
                    # set endTime for lost heartbeat
                    if job.endTime == 'NULL':
                        # normal lost heartbeat
                        job.endTime = job.modificationTime
                    # set files status
                    for file in job.Files:
                        if file.type == 'output' or file.type == 'log':
                            file.status = 'failed'
                            if not file.destinationDBlock in destDBList:
                                destDBList.append(file.destinationDBlock)
                    # event service
                    if EventServiceUtils.isEventServiceJob(
                            job
                    ) and not EventServiceUtils.isJobCloningJob(job):
                        eventStat = self.taskBuffer.getEventStat(
                            job.jediTaskID, job.PandaID)
                        # set sub status when no sucessful events
                        if EventServiceUtils.ST_finished not in eventStat:
                            job.jobSubStatus = 'es_heartbeat'
                    # update job
                    self.taskBuffer.updateJobs([job], False)
                    # start closer
                    if job.jobStatus == 'failed':

                        source = 'jobDispatcherErrorCode'
                        error_code = job.jobDispatcherErrorCode
                        error_diag = job.jobDispatcherErrorDiag

                        try:
                            _logger.debug(
                                "Watcher will call apply_retrial_rules")
                            retryModule.apply_retrial_rules(
                                self.taskBuffer, job.PandaID, source,
                                error_code, error_diag, job.attemptNr)
                            _logger.debug("apply_retrial_rules is back")
                        except Exception as e:
                            _logger.debug(
                                "apply_retrial_rules excepted and needs to be investigated (%s): %s"
                                % (e, traceback.format_exc()))

                        # updateJobs was successful and it failed a job with taskBufferErrorCode
                        try:

                            _logger.debug("Watcher.run will peek the job")
                            job_tmp = self.taskBuffer.peekJobs(
                                [job.PandaID],
                                fromDefined=False,
                                fromArchived=True,
                                fromWaiting=False)[0]
                            if job_tmp.taskBufferErrorCode:
                                source = 'taskBufferErrorCode'
                                error_code = job_tmp.taskBufferErrorCode
                                error_diag = job_tmp.taskBufferErrorDiag
                                _logger.debug(
                                    "Watcher.run 2 will call apply_retrial_rules"
                                )
                                retryModule.apply_retrial_rules(
                                    self.taskBuffer, job_tmp.PandaID, source,
                                    error_code, error_diag, job_tmp.attemptNr)
                                _logger.debug("apply_retrial_rules 2 is back")
                        except IndexError:
                            pass
                        except Exception as e:
                            self.logger.error(
                                "apply_retrial_rules 2 excepted and needs to be investigated (%s): %s"
                                % (e, traceback.format_exc()))

                        cThr = Closer(self.taskBuffer, destDBList, job)
                        cThr.start()
                        cThr.join()
                    _logger.debug('%s end' % job.PandaID)
                    return
                # single action
                if self.single:
                    return
                # sleep
                time.sleep(60 * self.sleepTime)
        except:
            type, value, traceBack = sys.exc_info()
            _logger.error("run() : %s %s" % (type, value))
            return
Ejemplo n.º 34
0
 def run(self):
     try:
         # get job
         tmpJobs = self.taskBuffer.getFullJobStatus([self.rPandaID])
         if tmpJobs == [] or tmpJobs[0] == None:
             _logger.debug("cannot find job for PandaID=%s" % self.rPandaID)
             return
         self.job = tmpJobs[0]
         _logger.debug("%s start %s:%s:%s" % (self.token,self.job.jobDefinitionID,self.job.prodUserName,self.job.computingSite))
         # using output container
         if not self.job.destinationDBlock.endswith('/'):
             _logger.debug("%s ouput dataset container is required" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # FIXEME : dont' touch group jobs for now
         if self.job.destinationDBlock.startswith('group') and (not self.userRequest):
             _logger.debug("%s skip group jobs" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check processingType
         typesForRebro = ['pathena','prun','ganga','ganga-rbtest']
         if not self.job.processingType in typesForRebro:
             _logger.debug("%s skip processingType=%s not in %s" % \
                           (self.token,self.job.processingType,str(typesForRebro)))
             _logger.debug("%s end" % self.token)
             return
         # check jobsetID
         if self.job.jobsetID in [0,'NULL',None]:
             _logger.debug("%s jobsetID is undefined" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check metadata 
         if self.job.metadata in [None,'NULL']:
             _logger.debug("%s metadata is unavailable" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --disableRebrokerage
         match = re.search("--disableRebrokerage",self.job.metadata)
         if match != None and (not self.simulation) and (not self.forceOpt) \
                and (not self.userRequest):
             _logger.debug("%s diabled rebrokerage" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --site
         match = re.search("--site",self.job.metadata)
         if match != None and (not self.simulation) and (not self.forceOpt) \
                and (not self.userRequest):
             _logger.debug("%s --site is used" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --libDS
         match = re.search("--libDS",self.job.metadata)
         if match != None:
             _logger.debug("%s --libDS is used" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --workingGroup since it is site-specific 
         match = re.search("--workingGroup",self.job.metadata)
         if match != None:
             _logger.debug("%s workingGroup is specified" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # avoid too many rebrokerage
         if not self.checkRev():
             _logger.debug("%s avoid too many rebrokerage" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check if multiple JobIDs use the same libDS
         if self.bPandaID != None and self.buildStatus not in ['finished','failed']:
             if self.minPandaIDlibDS == None or self.maxPandaIDlibDS == None:
                 _logger.debug("%s max/min PandaIDs are unavailable for the libDS" % self.token)
                 _logger.debug("%s end" % self.token)
                 return
             tmpPandaIDsForLibDS = self.taskBuffer.getFullJobStatus([self.minPandaIDlibDS,self.maxPandaIDlibDS])
             if len(tmpPandaIDsForLibDS) != 2 or tmpPandaIDsForLibDS[0] == None or tmpPandaIDsForLibDS[1] == None:
                 _logger.debug("%s failed to get max/min PandaIDs for the libDS" % self.token)
                 _logger.debug("%s end" % self.token)
                 return
             # check
             if tmpPandaIDsForLibDS[0].jobDefinitionID != tmpPandaIDsForLibDS[1].jobDefinitionID:
                 _logger.debug("%s multiple JobIDs use the libDS %s:%s %s:%s" % (self.token,tmpPandaIDsForLibDS[0].jobDefinitionID,
                                                                                 self.minPandaIDlibDS,tmpPandaIDsForLibDS[1].jobDefinitionID,
                                                                                 self.maxPandaIDlibDS))
                 _logger.debug("%s end" % self.token)
                 return
         # check excludedSite
         if self.excludedSite == None:
             self.excludedSite = []
             match = re.search("--excludedSite( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata)
             if match != None:
                 self.excludedSite = match.group(3).split(',')
         # remove empty
         try:
             self.excludedSite.remove('')
         except:
             pass
         _logger.debug("%s excludedSite=%s" % (self.token,str(self.excludedSite)))
         # check cloud
         if self.cloud == None:
             match = re.search("--cloud( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata)
             if match != None:
                 self.cloud = match.group(3)
         _logger.debug("%s cloud=%s" % (self.token,self.cloud))
         # get inDS/LFNs
         status,tmpMapInDS,maxFileSize = self.taskBuffer.getInDatasetsForReBrokerage(self.jobID,self.userName)
         if not status:
             # failed
             _logger.error("%s failed to get inDS/LFN from DB" % self.token)
             return
         status,inputDS = self.getListDatasetsUsedByJob(tmpMapInDS)
         if not status:
             # failed
             _logger.error("%s failed" % self.token)
             return 
         # get relicas
         replicaMap = {}
         unknownSites = {} 
         for tmpDS in inputDS:
             if tmpDS.endswith('/'):
                 # container
                 status,tmpRepMaps = self.getListDatasetReplicasInContainer(tmpDS)
             else:
                 # normal dataset
                 status,tmpRepMap = self.getListDatasetReplicas(tmpDS)
                 tmpRepMaps = {tmpDS:tmpRepMap}
             if not status:
                 # failed
                 _logger.debug("%s failed" % self.token)
                 return 
             # make map per site
             for tmpDS,tmpRepMap in tmpRepMaps.iteritems():
                 for tmpSite,tmpStat in tmpRepMap.iteritems():
                     # ignore special sites
                     if tmpSite in ['CERN-PROD_TZERO','CERN-PROD_DAQ','CERN-PROD_TMPDISK']:
                         continue
                     # ignore tape sites
                     if tmpSite.endswith('TAPE'):
                         continue
                     # keep sites with unknown replica info 
                     if tmpStat[-1]['found'] == None:
                         if not unknownSites.has_key(tmpDS):
                             unknownSites[tmpDS] = []
                         unknownSites[tmpDS].append(tmpSite)
                     # ignore ToBeDeleted
                     if tmpStat[-1]['archived'] in ['ToBeDeleted',]:
                         continue
                     # change EOS
                     if tmpSite.startswith('CERN-PROD_EOS'):
                         tmpSite = 'CERN-PROD_EOS'
                     # change EOS TMP
                     if tmpSite.startswith('CERN-PROD_TMP'):
                         tmpSite = 'CERN-PROD_TMP'
                     # change DISK to SCRATCHDISK
                     tmpSite = re.sub('_[^_-]+DISK$','',tmpSite)
                     # change PERF-XYZ to SCRATCHDISK
                     tmpSite = re.sub('_PERF-[^_-]+$','',tmpSite)
                     # change PHYS-XYZ to SCRATCHDISK
                     tmpSite = re.sub('_PHYS-[^_-]+$','',tmpSite)
                     # patch for BNLPANDA
                     if tmpSite in ['BNLPANDA']:
                         tmpSite = 'BNL-OSG2'
                     # add to map    
                     if not replicaMap.has_key(tmpSite):
                         replicaMap[tmpSite] = {}
                     replicaMap[tmpSite][tmpDS] = tmpStat[-1]
         _logger.debug("%s replica map -> %s" % (self.token,str(replicaMap)))
         # refresh replica info in needed
         self.refreshReplicaInfo(unknownSites)
         # instantiate SiteMapper
         siteMapper = SiteMapper(self.taskBuffer)
         # get original DDM
         origSiteDDM = self.getAggName(siteMapper.getSite(self.job.computingSite).ddm)
         # check all datasets
         maxDQ2Sites = []
         if inputDS != []:
             # loop over all sites
             for tmpSite,tmpDsVal in replicaMap.iteritems():
                 # loop over all datasets
                 appendFlag = True
                 for tmpOrigDS in inputDS:
                     # check completeness
                     if tmpDsVal.has_key(tmpOrigDS) and tmpDsVal[tmpOrigDS]['found'] != None and \
                            tmpDsVal[tmpOrigDS]['total'] == tmpDsVal[tmpOrigDS]['found']:
                         pass
                     else:
                         appendFlag = False
                 # append
                 if appendFlag:
                     if not tmpSite in maxDQ2Sites:
                         maxDQ2Sites.append(tmpSite)
         _logger.debug("%s candidate DQ2s -> %s" % (self.token,str(maxDQ2Sites)))
         if inputDS != [] and maxDQ2Sites == []:
             _logger.debug("%s no DQ2 candidate" % self.token)
         else:
             maxPandaSites = []
             # original maxinputsize
             origMaxInputSize = siteMapper.getSite(self.job.computingSite).maxinputsize
             # look for Panda siteIDs
             for tmpSiteID,tmpSiteSpec in siteMapper.siteSpecList.iteritems():
                 # use ANALY_ only
                 if not tmpSiteID.startswith('ANALY_'):
                     continue
                 # remove test and local
                 if re.search('_test',tmpSiteID,re.I) != None:
                     continue
                 if re.search('_local',tmpSiteID,re.I) != None:
                     continue
                 # avoid same site
                 if self.avoidSameSite and self.getAggName(tmpSiteSpec.ddm) == origSiteDDM:
                     continue
                 # check DQ2 ID
                 if self.cloud in [None,tmpSiteSpec.cloud] \
                        and (self.getAggName(tmpSiteSpec.ddm) in maxDQ2Sites or inputDS == []):
                     # excluded sites
                     excludedFlag = False
                     for tmpExcSite in self.excludedSite:
                         if re.search(tmpExcSite,tmpSiteID) != None:
                             excludedFlag = True
                             break
                     if excludedFlag:
                         _logger.debug("%s skip %s since excluded" % (self.token,tmpSiteID))
                         continue
                     # use online only
                     if tmpSiteSpec.status != 'online':
                         _logger.debug("%s skip %s status=%s" % (self.token,tmpSiteID,tmpSiteSpec.status))
                         continue
                     # check maxinputsize
                     if (maxFileSize == None and origMaxInputSize > siteMapper.getSite(tmpSiteID).maxinputsize) or \
                            maxFileSize > siteMapper.getSite(tmpSiteID).maxinputsize:
                         _logger.debug("%s skip %s due to maxinputsize" % (self.token,tmpSiteID))
                         continue
                     # append
                     if not tmpSiteID in maxPandaSites:
                         maxPandaSites.append(tmpSiteID)
             # choose at most 20 sites randomly to avoid too many lookup            
             random.shuffle(maxPandaSites)
             maxPandaSites = maxPandaSites[:20]
             _logger.debug("%s candidate PandaSites -> %s" % (self.token,str(maxPandaSites)))
             # no Panda siteIDs            
             if maxPandaSites == []:            
                 _logger.debug("%s no Panda site candidate" % self.token)
             else:
                 # set AtlasRelease and cmtConfig to dummy job
                 tmpJobForBrokerage = JobSpec()
                 if self.job.AtlasRelease in ['NULL',None]:
                     tmpJobForBrokerage.AtlasRelease = ''
                 else:
                     tmpJobForBrokerage.AtlasRelease = self.job.AtlasRelease
                 # use nightlies
                 matchNight = re.search('^AnalysisTransforms-.*_(rel_\d+)$',self.job.homepackage)
                 if matchNight != None:
                     tmpJobForBrokerage.AtlasRelease += ':%s' % matchNight.group(1)
                 # use cache
                 else:
                     matchCache = re.search('^AnalysisTransforms-([^/]+)',self.job.homepackage)
                     if matchCache != None:
                         tmpJobForBrokerage.AtlasRelease = matchCache.group(1).replace('_','-')
                 if not self.job.cmtConfig in ['NULL',None]:    
                     tmpJobForBrokerage.cmtConfig = self.job.cmtConfig
                 # memory size
                 if not self.job.minRamCount in ['NULL',None,0]:
                     tmpJobForBrokerage.minRamCount = self.job.minRamCount
                 # CPU count
                 if not self.job.maxCpuCount in ['NULL',None,0]:
                     tmpJobForBrokerage.maxCpuCount = self.job.maxCpuCount
                 # run brokerage
                 brokerage.broker.schedule([tmpJobForBrokerage],self.taskBuffer,siteMapper,forAnalysis=True,
                                           setScanSiteList=maxPandaSites,trustIS=True,reportLog=True)
                 newSiteID = tmpJobForBrokerage.computingSite
                 self.brokerageInfo += tmpJobForBrokerage.brokerageErrorDiag
                 _logger.debug("%s runBrokerage - > %s" % (self.token,newSiteID))
                 # unknown site
                 if not siteMapper.checkSite(newSiteID):
                     _logger.error("%s unknown site" % self.token)
                     _logger.debug("%s failed" % self.token)
                     return 
                 # get new site spec
                 newSiteSpec = siteMapper.getSite(newSiteID)
                 # avoid repetition
                 if self.getAggName(newSiteSpec.ddm) == origSiteDDM:
                     _logger.debug("%s assigned to the same site %s " % (self.token,newSiteID))
                     _logger.debug("%s end" % self.token)                        
                     return
                 # simulation mode
                 if self.simulation:
                     _logger.debug("%s end simulation" % self.token)                        
                     return
                 # prepare jobs
                 status = self.prepareJob(newSiteID,newSiteSpec)
                 if status:
                     # run SetUpper
                     statusSetUp = self.runSetUpper()
                     if not statusSetUp:
                         _logger.debug("%s runSetUpper failed" % self.token)
                     else:
                         _logger.debug("%s successfully assigned to %s" % (self.token,newSiteID))
         _logger.debug("%s end" % self.token)
     except:
         errType,errValue,errTraceBack = sys.exc_info()
         _logger.error("%s run() : %s %s" % (self.token,errType,errValue))
Ejemplo n.º 35
0
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

if len(sys.argv)>1:
    site = sys.argv[1]
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
#destName   = 'BNL_SE'

jobList = []

for i in [999905,999906,999907]:
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),i)
    job.AtlasRelease      = 'Atlas-14.1.0'
    job.homepackage       = 'AtlasProduction/12.0.6.2'
    job.transformation    = 'csc_evgen_trf.py'
    job.destinationDBlock = datasetName
    #job.destinationSE     = destName
    job.currentPriority   = 1000
    job.prodSourceLabel   = 'managed'
    #job.prodSourceLabel   = 'test'
    #job.computingSite     = site
    job.cmtConfig         = 'i686-slc4-gcc34-opt'
    job.metadata          = 'evgen;%s;%s;%s' % (str({'FR': 46, 'NL': 45, 'NDGF': 300, 'CERN': 19, 'TW': 44110, 'CA': 2922, 'DE': 9903, 'IT': 1168, 'US': 6226, 'UK': 1026, 'ES': 26619}),str({999907:100,999906:200,999905:300}),str({999905:100,999906:910,999907:500}))
    #job.metadata          = 'evgen;%s' % str({'FR': 46, 'NL': 45, 'NDGF': 300, 'CERN': 19, 'TW': 44110, 'CA': 2922, 'DE': 9903, 'IT': 1168, 'US': 6226, 'UK': 1026, 'ES': 26619})
Ejemplo n.º 36
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        #       in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter
        if job.inputdata and not job._getRoot().subjobs:

            if not job.inputdata.names:

                contents = job.inputdata.get_contents(overlap=False, size=True)

                for ds in contents.keys():

                    for f in contents[ds]:
                        job.inputdata.guids.append(f[0])
                        job.inputdata.names.append(f[1][0])
                        job.inputdata.sizes.append(f[1][1])
                        job.inputdata.checksums.append(f[1][2])
                        job.inputdata.scopes.append(f[1][3])

        site = job._getRoot().backend.site
        job.backend.site = site
        job.backend.actualCE = site
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        #       if no outputdata are given
        if not job.outputdata:
            job.outputdata = DQ2OutputDataset()
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname
        #if not job.outputdata.datasetname:
        else:
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname

        if not job.outputdata.datasetname:
            raise ApplicationConfigurationError(
                None, 'DQ2OutputDataset has no datasetname')

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.transformation = '%s/runGen-00-00-02' % Client.baseURLSUB
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            if not job._getRoot().subjobs or job.id == 0:
                logger.warning(
                    'You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.'
                )
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = site
        jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.assignedPriority = configPanda['assignedPriorityRun']
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        # cputime
        if job.backend.requirements.cputime != -1:
            jspec.maxCpuCount = job.backend.requirements.cputime
        jspec.computingSite = site

        #       library (source files)
        if job.backend.libds:
            flib = FileSpec()
            flib.lfn = self.fileBO.lfn
            flib.GUID = self.fileBO.GUID
            flib.type = 'input'
            flib.status = self.fileBO.status
            flib.dataset = self.fileBO.destinationDBlock
            flib.dispatchDBlock = self.fileBO.destinationDBlock
            jspec.addFile(flib)
        elif job.backend.bexec:
            flib = FileSpec()
            flib.lfn = self.library
            flib.type = 'input'
            flib.dataset = self.libDataset
            flib.dispatchDBlock = self.libDataset
            jspec.addFile(flib)

#       input files FIXME: many more input types
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(
                    job.inputdata.guids, job.inputdata.names,
                    job.inputdata.sizes, job.inputdata.checksums,
                    job.inputdata.scopes):
                finp = FileSpec()
                finp.lfn = lfn
                finp.GUID = guid
                finp.scope = scope

                #            finp.fsize =
                #            finp.md5sum =
                finp.dataset = job.inputdata.dataset[0]
                finp.prodDBlock = job.inputdata.dataset[0]
                finp.dispatchDBlock = job.inputdata.dataset[0]
                finp.type = 'input'
                finp.status = 'ready'
                jspec.addFile(finp)


#       output files
#        outMap = {}

#FIXME: if options.outMeta != []:
        self.rundirectory = "."

        #       log files

        flog = FileSpec()
        flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname
        flog.type = 'log'
        flog.dataset = job.outputdata.datasetname
        flog.destinationDBlock = job.outputdata.datasetname
        flog.destinationSE = job.backend.site
        jspec.addFile(flog)

        #       job parameters
        param = ''

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL)
        srcURL = ""
        if matchURL != None:
            srcURL = matchURL.group(1)
            param += " --sourceURL %s " % srcURL

        param += '-r "%s" ' % self.rundirectory

        exe_name = job.application.exe
        if job.backend.bexec == '':
            if hasattr(job.application.exe, "name"):
                exe_name = os.path.basename(job.application.exe.name)

            # set jobO parameter
            if job.application.args:
                param += ' -j "" -p "%s %s" ' % (
                    exe_name, urllib.quote(" ".join(job.application.args)))
            else:
                param += ' -j "" -p "%s" ' % exe_name
            if self.inputsandbox:
                param += ' -a %s ' % self.inputsandbox

        else:
            param += '-l %s ' % self.library
            param += '-j "" -p "%s %s" ' % (
                exe_name, urllib.quote(" ".join(job.application.args)))

        if job.inputdata:
            param += '-i "%s" ' % job.inputdata.names

        # fill outfiles
        outfiles = {}
        for f in self.extOutFile:
            tarnum = 1
            if f.find('*') != -1:
                # archive *
                outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (
                    tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S"))
                tarnum += 1
            else:
                outfiles[f] = "%s.%s.%s" % (f, job.getFQID('.'),
                                            time.strftime("%Y%m%d%H%M%S"))

            fout = FileSpec()
            fout.lfn = outfiles[f]
            fout.type = 'output'
            fout.dataset = job.outputdata.datasetname
            fout.destinationDBlock = job.outputdata.datasetname
            fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        param += '-o "%s" ' % (
            outfiles
        )  # must be double quotes, because python prints strings in 'single quotes'

        for file in jspec.Files:
            if file.type in ['output', 'log'] and configPanda['chirpconfig']:
                file.dispatchDBlockToken = configPanda['chirpconfig']
                logger.debug('chirp file %s', file)

        jspec.jobParameters = param

        return jspec
Ejemplo n.º 37
0
    if argv == '-s':
        aSrvID = sys.argv[idx + 1]
        sys.argv = sys.argv[:idx]
        break

#site = sys.argv[1]
site = 'ANALY_BNL-LSST'  #orig
#site = 'BNL-LSST'
#site = 'SWT2_CPB-LSST'
#site = 'UTA_SWT2-LSST'
#site = 'ANALY_SWT2_CPB-LSST'

datasetName = 'panda.user.jschovan.lsst.%s' % commands.getoutput('uuidgen')
destName = None

job = JobSpec()
job.jobDefinitionID = int(time.time()) % 10000
job.jobName = "%s" % commands.getoutput('uuidgen')
### job.transformation    = 'http://www.usatlas.bnl.gov/~wenaus/lsst-trf/lsst-trf.sh'
#job.transformation    = 'http://pandawms.org/pandawms-jobcache/lsst-trf.sh'
job.transformation = 'http://pandawms.org/pandawms-jobcache/lsst-trf-phosim332.sh'
job.destinationDBlock = datasetName
#job.destinationSE     = destName
job.destinationSE = 'local'
job.currentPriority = 1000
#job.prodSourceLabel   = 'ptest'
#job.prodSourceLabel = 'panda'
#job.prodSourceLabel = 'ptest'
#job.prodSourceLabel = 'test'
#job.prodSourceLabel = 'ptest'
### 2014-01-27
Ejemplo n.º 38
0
    def run(self, data):
        datasetName = 'panda:panda.destDB.%s' % commands.getoutput('uuidgen')
        destName = 'ANALY_RRC-KI-HPC'
        site = 'ANALY_RRC-KI-HPC'
        scope = config['DEFAULT_SCOPE']

        distributive = data['distributive']
        release = data['release']
        parameters = data['parameters']
        input_type = data['input_type']
        input_params = data['input_params']
        input_files = data['input_files']
        output_type = data['output_type']
        output_params = data['output_params']
        output_files = data['output_files']

        jobid = data['jobid']
        _logger.debug('Jobid: ' + str(jobid))

        job = JobSpec()
        job.jobDefinitionID = int(time.time()) % 10000
        job.jobName = commands.getoutput('uuidgen')
        job.transformation = config['DEFAULT_TRF']
        job.destinationDBlock = datasetName
        job.destinationSE = destName
        job.currentPriority = 1000
        job.prodSourceLabel = 'user'
        job.computingSite = site
        job.cloud = 'RU'
        job.prodDBlock = "%s:%s.%s" % (scope, scope, job.jobName)

        job.jobParameters = '%s %s "%s"' % (release, distributive, parameters)

        params = {}
        _logger.debug('MoveData')
        ec = 0
        ec, uploaded_input_files = movedata(
            params=params,
            fileList=input_files,
            fromType=input_type,
            fromParams=input_params,
            toType='hpc',
            toParams={'dest': '/' + re.sub(':', '/', job.prodDBlock)})
        if ec != 0:
            _logger.error('Move data error: ' + ec[1])
            return

        for file in uploaded_input_files:
            fileIT = FileSpec()
            fileIT.lfn = file
            fileIT.dataset = job.prodDBlock
            fileIT.prodDBlock = job.prodDBlock
            fileIT.type = 'input'
            fileIT.scope = scope
            fileIT.status = 'ready'
            fileIT.GUID = commands.getoutput('uuidgen')
            job.addFile(fileIT)

        for file in output_files:
            fileOT = FileSpec()
            fileOT.lfn = file
            fileOT.destinationDBlock = job.prodDBlock
            fileOT.destinationSE = job.destinationSE
            fileOT.dataset = job.prodDBlock
            fileOT.type = 'output'
            fileOT.scope = scope
            fileOT.GUID = commands.getoutput('uuidgen')
            job.addFile(fileOT)

        fileOL = FileSpec()
        fileOL.lfn = "%s.log.tgz" % job.jobName
        fileOL.destinationDBlock = job.destinationDBlock
        fileOL.destinationSE = job.destinationSE
        fileOL.dataset = job.destinationDBlock
        fileOL.type = 'log'
        fileOL.scope = 'panda'
        job.addFile(fileOL)

        self.jobList.append(job)

        #submitJob
        o = self.submitJobs(self.jobList)
        x = o[0]

        #update PandaID
        conn = MySQLdb.connect(
            host=self.dbhost,
            db=self.dbname,
            #                                        port=self.dbport, connect_timeout=self.dbtimeout,
            user=self.dbuser,
            passwd=self.dbpasswd)
        cur = conn.cursor()
        try:
            varDict = {}
            PandaID = int(x[0])
            varDict['id'] = jobid
            varDict['pandaId'] = PandaID

            sql = "UPDATE %s SET %s.pandaId=%s WHERE %s.id=%s" % (
                self.table_jobs, self.table_jobs, varDict['pandaId'],
                self.table_jobs, varDict['id'])
            cur.execute(sql, varDict)

        except:
            _logger.error('SENDJOB: Incorrect server response')
        try:
            conn.commit()
            return True
        except:
            _logger.error("commit error")
            return False
Ejemplo n.º 39
0
destName    = None

files = {
    'misal1_mc12.005802.JF17_pythia_jet_filter.digit.RDO.v12000601_tid008610._11615.pool.root.1':None,
    #'misal1_mc12.005802.JF17_pythia_jet_filter.digit.RDO.v12000601_tid008610._11639.pool.root.1':None,    
    #'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554._03634.pool.root.1':None,
    #'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554._03248.pool.root.1':None,
    #'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554._03634.pool.root.1':None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-12.0.6'
    job.homepackage       = 'AtlasProduction/12.0.6.4'
    job.transformation    = 'csc_reco_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.computingSite     = site
    #job.prodDBlock        = 'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554'
    job.prodDBlock        = 'misal1_mc12.005802.JF17_pythia_jet_filter.digit.RDO.v12000601_tid008610' 
    job.cloud             = 'US'   

    job.prodSourceLabel   = 'test'    
    job.currentPriority   = 10000
    job.cmtConfig         = 'i686-slc4-gcc34-opt'
Ejemplo n.º 40
0
def createJobSpec(nodes, walltime, command, jobName, outputFile=None):

    transformation = '#json#'
    datasetName = 'panda.destDB.%s' % subprocess.check_output('uuidgen')
    destName = 'local'
    prodSourceLabel = 'user'
    currentPriority = 1000

    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = jobName
    job.VO = VO
    job.transformation = transformation

    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = currentPriority
    job.prodSourceLabel = prodSourceLabel
    job.computingSite = QUEUE_NAME
    job.cmtConfig = json.dumps({'name': job.jobName, 'next': None})
    lqcd_command = {
        "nodes": nodes,
        "walltime": walltime,
        "name": job.jobName,
        "command": command
    }

    if (outputFile):
        lqcd_command['outputFile'] = outputFile
    job.jobParameters = json.dumps(lqcd_command)

    fileOL = FileSpec()
    fileOL.lfn = "%s.job.log.tgz" % job.jobName.strip()
    fileOL.destinationDBlock = job.destinationDBlock
    fileOL.destinationSE = job.destinationSE
    fileOL.dataset = job.destinationDBlock
    fileOL.type = 'log'

    job.addFile(fileOL)

    #job.cmtConfig = None

    return job
Ejemplo n.º 41
0
from taskbuffer.FileSpec import FileSpec

if len(sys.argv)>1:
    site = sys.argv[1]
else:
    site = "ANALY_BNL_ATLAS_1"

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName    = 'BNL_SE'

jobDefinitionID = int(time.time()) % 10000

jobList = []

for i in range(2):
    job = JobSpec()
    job.jobDefinitionID   = jobDefinitionID
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),i)
    job.AtlasRelease      = 'Atlas-12.0.6'
    job.homepackage       = 'AnalysisTransforms'
    job.transformation    = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthenaXrd'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.currentPriority   = 3000
    job.assignedPriority  = 3000    
    job.prodSourceLabel   = 'user'
    job.computingSite     = site
    
    file = FileSpec()
    file.lfn = "%s.AANT._%05d.root" % (job.jobName,i)
    file.destinationDBlock = job.destinationDBlock
Ejemplo n.º 42
0
 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue):
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug('vo={0} label={1} queue={2}'.format(
         vo, prodSourceLabel, workQueue.queue_name))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = 'managed'
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (
                 taskSpec.getOutDiskSize() +
                 taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(
                         datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     jobSpec.currentPriority)
                 if tmpRW == None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(
                 jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error(
                     'failed to calculate RW for jediTaskID={0}'.format(
                         jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # get fullRWs
     fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(
         vo, prodSourceLabel, None, None)
     if fullRWs == None:
         tmpLog.error('failed to calculate full RW')
         return retTmpError
     # set metadata
     for jobSpec in jobSpecList:
         rwValues = allRwMap[jobSpec.currentPriority]
         jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
             jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap),
             str(fullRWs), str(tt2Map))
     tmpLog.debug('run task assigner for {0} tasks'.format(
         len(jobSpecList)))
     nBunchTask = 0
     while nBunchTask < len(jobSpecList):
         # get a bunch
         jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask]
         strIDs = 'jediTaskID='
         for tmpJobSpec in jobsBunch:
             strIDs += '{0},'.format(tmpJobSpec.taskID)
         strIDs = strIDs[:-1]
         tmpLog.debug(strIDs)
         # increment index
         nBunchTask += maxBunchTask
         # run task brokerge
         stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
         tmpLog.debug('{0}:{1}'.format(stS, str(outSs)))
     # return
     tmpLog.debug('done')
     return self.SC_SUCCEEDED
Ejemplo n.º 43
0
    cloud = sys.argv[2]
    prodDBlock = sys.argv[3]
    inputFile = sys.argv[4]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile: None,
}

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID = (time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index)
    job.AtlasRelease = 'Atlas-15.3.1'
    job.homepackage = 'AtlasProduction/15.3.1.5'
    job.transformation = 'csc_atlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite = site
    job.prodDBlock = prodDBlock

    job.prodSourceLabel = 'test'
    job.processingType = 'test'
    job.currentPriority = 10000
    job.cloud = cloud
    job.cmtConfig = 'i686-slc4-gcc34-opt'
Ejemplo n.º 44
0
    def defineEvgen16Job(self, i):
        """Define an Evgen16 job based on predefined values and randomly generated names
        """

        job = JobSpec()
        job.computingSite = self.__site
        job.cloud = self.__cloud

        job.jobDefinitionID = int(time.time()) % 10000
        job.jobName = "%s_%d" % (uuid.uuid1(), i)
        job.AtlasRelease = 'Atlas-16.6.2'
        job.homepackage = 'AtlasProduction/16.6.2.1'
        job.transformation = 'Evgen_trf.py'
        job.destinationDBlock = self.__datasetName
        job.destinationSE = self.__destName
        job.currentPriority = 10000
        job.prodSourceLabel = 'test'
        job.cmtConfig = 'i686-slc5-gcc43-opt'

        #Output file
        fileO = FileSpec()
        fileO.lfn = "%s.evgen.pool.root" % job.jobName
        fileO.destinationDBlock = job.destinationDBlock
        fileO.destinationSE = job.destinationSE
        fileO.dataset = job.destinationDBlock
        fileO.destinationDBlockToken = 'ATLASDATADISK'
        fileO.type = 'output'
        job.addFile(fileO)

        #Log file
        fileL = FileSpec()
        fileL.lfn = "%s.job.log.tgz" % job.jobName
        fileL.destinationDBlock = job.destinationDBlock
        fileL.destinationSE = job.destinationSE
        fileL.dataset = job.destinationDBlock
        fileL.destinationDBlockToken = 'ATLASDATADISK'
        fileL.type = 'log'
        job.addFile(fileL)

        job.jobParameters = "2760 105048 19901 101 200 MC10.105048.PythiaB_ccmu3mu1X.py %s NONE NONE NONE MC10JobOpts-latest-test.tar.gz" % fileO.lfn
        return job
Ejemplo n.º 45
0
    cloud      = sys.argv[2]
    prodDBlock = sys.argv[3]
    inputFile  = sys.argv[4]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile:None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = (time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-17.0.5'
    job.homepackage       = 'AtlasProduction/17.0.5.6'
    job.transformation    = 'AtlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite     = site
    job.prodDBlock        = prodDBlock
    
    job.prodSourceLabel   = 'test'
    job.processingType    = 'test'
    job.currentPriority   = 10000
    job.cloud             = cloud
    job.cmtConfig         = 'i686-slc5-gcc43-opt'
Ejemplo n.º 46
0
 def run(self):
     try:
         self.putLog('start %s' % self.evpFileName)
         # lock evp file
         self.evpFile = open(self.evpFileName)
         try:
             fcntl.flock(self.evpFile.fileno(),
                         fcntl.LOCK_EX | fcntl.LOCK_NB)
         except:
             # relase
             self.putLog("cannot lock %s" % self.evpFileName)
             self.evpFile.close()
             return True
         # options
         runEvtList = []
         eventPickDataType = ''
         eventPickStreamName = ''
         eventPickDS = []
         eventPickAmiTag = ''
         eventPickNumSites = 1
         inputFileList = []
         tagDsList = []
         tagQuery = ''
         tagStreamRef = ''
         skipDaTRI = False
         runEvtGuidMap = {}
         # read evp file
         for tmpLine in self.evpFile:
             tmpMatch = re.search('^([^=]+)=(.+)$', tmpLine)
             # check format
             if tmpMatch == None:
                 continue
             tmpItems = tmpMatch.groups()
             if tmpItems[0] == 'runEvent':
                 # get run and event number
                 tmpRunEvt = tmpItems[1].split(',')
                 if len(tmpRunEvt) == 2:
                     runEvtList.append(tmpRunEvt)
             elif tmpItems[0] == 'eventPickDataType':
                 # data type
                 eventPickDataType = tmpItems[1]
             elif tmpItems[0] == 'eventPickStreamName':
                 # stream name
                 eventPickStreamName = tmpItems[1]
             elif tmpItems[0] == 'eventPickDS':
                 # dataset pattern
                 eventPickDS = tmpItems[1].split(',')
             elif tmpItems[0] == 'eventPickAmiTag':
                 # AMI tag
                 eventPickAmiTag = tmpItems[1]
             elif tmpItems[0] == 'eventPickNumSites':
                 # the number of sites where datasets are distributed
                 try:
                     eventPickNumSites = int(tmpItems[1])
                 except:
                     pass
             elif tmpItems[0] == 'userName':
                 # user name
                 self.userDN = tmpItems[1]
                 self.putLog("user=%s" % self.userDN)
             elif tmpItems[0] == 'userTaskName':
                 # user task name
                 self.userTaskName = tmpItems[1]
             elif tmpItems[0] == 'userDatasetName':
                 # user dataset name
                 self.userDatasetName = tmpItems[1]
             elif tmpItems[0] == 'lockedBy':
                 # client name
                 self.lockedBy = tmpItems[1]
             elif tmpItems[0] == 'creationTime':
                 # creation time
                 self.creationTime = tmpItems[1]
             elif tmpItems[0] == 'params':
                 # parameters
                 self.params = tmpItems[1]
             elif tmpItems[0] == 'inputFileList':
                 # input file list
                 inputFileList = tmpItems[1].split(',')
                 try:
                     inputFileList.remove('')
                 except:
                     pass
             elif tmpItems[0] == 'tagDS':
                 # TAG dataset
                 tagDsList = tmpItems[1].split(',')
             elif tmpItems[0] == 'tagQuery':
                 # query for TAG
                 tagQuery = tmpItems[1]
             elif tmpItems[0] == 'tagStreamRef':
                 # StreamRef for TAG
                 tagStreamRef = tmpItems[1]
                 if not tagStreamRef.endswith('_ref'):
                     tagStreamRef += '_ref'
             elif tmpItems[0] == 'runEvtGuidMap':
                 # GUIDs
                 try:
                     exec "runEvtGuidMap=" + tmpItems[1]
                 except:
                     pass
         # extract task name
         if self.userTaskName == '' and self.params != '':
             try:
                 tmpMatch = re.search('--outDS(=| ) *([^ ]+)', self.params)
                 if tmpMatch != None:
                     self.userTaskName = tmpMatch.group(2)
                     if not self.userTaskName.endswith('/'):
                         self.userTaskName += '/'
             except:
                 pass
         # suppress DaTRI
         if self.params != '':
             if '--eventPickSkipDaTRI' in self.params:
                 skipDaTRI = True
         # get compact user name
         compactDN = self.taskBuffer.cleanUserID(self.userDN)
         # get jediTaskID
         self.jediTaskID = self.taskBuffer.getTaskIDwithTaskNameJEDI(
             compactDN, self.userTaskName)
         # convert
         if tagDsList == [] or tagQuery == '':
             # convert run/event list to dataset/file list
             tmpRet, locationMap, allFiles = self.pd2p.convertEvtRunToDatasets(
                 runEvtList, eventPickDataType, eventPickStreamName,
                 eventPickDS, eventPickAmiTag, self.userDN, runEvtGuidMap)
             if not tmpRet:
                 if 'isFatal' in locationMap and locationMap[
                         'isFatal'] == True:
                     self.ignoreError = False
                 self.endWithError(
                     'Failed to convert the run/event list to a dataset/file list'
                 )
                 return False
         else:
             # get parent dataset/files with TAG
             tmpRet, locationMap, allFiles = self.pd2p.getTagParentInfoUsingTagQuery(
                 tagDsList, tagQuery, tagStreamRef)
             if not tmpRet:
                 self.endWithError(
                     'Failed to get parent dataset/file list with TAG')
                 return False
         # use only files in the list
         if inputFileList != []:
             tmpAllFiles = []
             for tmpFile in allFiles:
                 if tmpFile['lfn'] in inputFileList:
                     tmpAllFiles.append(tmpFile)
             allFiles = tmpAllFiles
         # remove redundant CN from DN
         tmpDN = self.userDN
         tmpDN = re.sub('/CN=limited proxy', '', tmpDN)
         tmpDN = re.sub('(/CN=proxy)+$', '', tmpDN)
         # make dataset container
         tmpRet = self.pd2p.registerDatasetContainerWithDatasets(
             self.userDatasetName,
             allFiles,
             locationMap,
             nSites=eventPickNumSites,
             owner=tmpDN)
         if not tmpRet:
             self.endWithError('Failed to make a dataset container %s' %
                               self.userDatasetName)
             return False
         # skip DaTRI
         if skipDaTRI:
             # successfully terminated
             self.putLog("skip DaTRI")
             # update task
             self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID)
         else:
             # get candidates
             tmpRet, candidateMaps = self.pd2p.getCandidates(
                 self.userDatasetName, checkUsedFile=False, useHidden=True)
             if not tmpRet:
                 self.endWithError(
                     'Failed to find candidate for destination')
                 return False
             # collect all candidates
             allCandidates = []
             for tmpDS, tmpDsVal in candidateMaps.iteritems():
                 for tmpCloud, tmpCloudVal in tmpDsVal.iteritems():
                     for tmpSiteName in tmpCloudVal[0]:
                         if not tmpSiteName in allCandidates:
                             allCandidates.append(tmpSiteName)
             if allCandidates == []:
                 self.endWithError('No candidate for destination')
                 return False
             # get list of dataset (container) names
             if eventPickNumSites > 1:
                 # decompose container to transfer datasets separately
                 tmpRet, tmpOut = self.pd2p.getListDatasetReplicasInContainer(
                     self.userDatasetName)
                 if not tmpRet:
                     self.endWithError('Failed to get the size of %s' %
                                       self.userDatasetName)
                     return False
                 userDatasetNameList = tmpOut.keys()
             else:
                 # transfer container at once
                 userDatasetNameList = [self.userDatasetName]
             # loop over all datasets
             sitesUsed = []
             for tmpUserDatasetName in userDatasetNameList:
                 # get size of dataset container
                 tmpRet, totalInputSize = rucioAPI.getDatasetSize(
                     tmpUserDatasetName)
                 if not tmpRet:
                     self.endWithError('Failed to get the size of %s' %
                                       tmpUserDatasetName)
                     return False
                 # run brokerage
                 tmpJob = JobSpec()
                 tmpJob.AtlasRelease = ''
                 self.putLog("run brokerage for %s" % tmpDS)
                 brokerage.broker.schedule([tmpJob],
                                           self.taskBuffer,
                                           self.siteMapper,
                                           True,
                                           allCandidates,
                                           True,
                                           datasetSize=totalInputSize)
                 if tmpJob.computingSite.startswith('ERROR'):
                     self.endWithError('brokerage failed with %s' %
                                       tmpJob.computingSite)
                     return False
                 self.putLog("site -> %s" % tmpJob.computingSite)
                 # send transfer request
                 try:
                     tmpDN = rucioAPI.parse_dn(tmpDN)
                     tmpStatus, userInfo = rucioAPI.finger(tmpDN)
                     if not tmpStatus:
                         raise RuntimeError, 'user info not found for {0} with {1}'.format(
                             tmpDN, userInfo)
                     tmpDN = userInfo['nickname']
                     tmpDQ2ID = self.siteMapper.getSite(
                         tmpJob.computingSite).ddm
                     tmpMsg = "%s ds=%s site=%s id=%s" % (
                         'registerDatasetLocation for DaTRI ',
                         tmpUserDatasetName, tmpDQ2ID, tmpDN)
                     self.putLog(tmpMsg)
                     rucioAPI.registerDatasetLocation(
                         tmpDS, [tmpDQ2ID],
                         lifetime=14,
                         owner=tmpDN,
                         activity="User Subscriptions")
                     self.putLog('OK')
                 except:
                     errType, errValue = sys.exc_info()[:2]
                     tmpStr = 'Failed to send transfer request : %s %s' % (
                         errType, errValue)
                     tmpStr.strip()
                     tmpStr += traceback.format_exc()
                     self.endWithError(tmpStr)
                     return False
                 # list of sites already used
                 sitesUsed.append(tmpJob.computingSite)
                 self.putLog("used %s sites" % len(sitesUsed))
                 # set candidates
                 if len(sitesUsed) >= eventPickNumSites:
                     # reset candidates to limit the number of sites
                     allCandidates = sitesUsed
                     sitesUsed = []
                 else:
                     # remove site
                     allCandidates.remove(tmpJob.computingSite)
             # send email notification for success
             tmpMsg = 'A transfer request was successfully sent to Rucio.\n'
             tmpMsg += 'Your task will get started once transfer is completed.'
             self.sendEmail(True, tmpMsg)
         try:
             # unlock and delete evp file
             fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_UN)
             self.evpFile.close()
             os.remove(self.evpFileName)
         except:
             pass
         # successfully terminated
         self.putLog("end %s" % self.evpFileName)
         return True
     except:
         errType, errValue = sys.exc_info()[:2]
         self.endWithError('Got exception %s:%s %s' %
                           (errType, errValue, traceback.format_exc()))
         return False
Ejemplo n.º 47
0
  print 'Cloud not known: %s'%cloud
  cloud = None
  files={'EVNT.012303._00545.pool.root.1':'rod.cloudtest1'}

# UK
#'mc12.005802.JF17_pythia_jet_filter.evgen.EVNT.v12003105_tid004541._01035.pool.root.1':'mc12.005802.JF17_pythia_jet_filter.evgen.EVNT.v12003105_tid004541',
# CA
#    'EVNT.012303._00901.pool.root.1':'mc12.005001.pythia_minbias.evgen.EVNT.v12000701_tid012303',



jobList = []

for i in range(1):
  for lfn in files.keys():
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = commands.getoutput('uuidgen') 
    job.AtlasRelease      = 'Atlas-12.0.7'
    job.homepackage       = 'AtlasProduction/12.0.7.1'
# Need different args too
#    job.AtlasRelease      = 'Atlas-13.0.30'
#    job.homepackage       = 'AtlasProduction/13.0.30.2'
    job.transformation    = 'csc_simul_trf.py'
    job.destinationDBlock = datasetName
    job.cloud = cloud
    job.computingSite     = site
#    job.prodDBlock        = 'mc12.005001.pythia_minbias.evgen.EVNT.v12000701_tid012303'
    job.prodDBlock        = files[lfn]
    job.prodSourceLabel   = 'test'
#    job.prodSourceLabel   = 'cloudtest'
Ejemplo n.º 48
0
    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.'))
        
        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()
        
        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud
        

        # now just filling the job from AthenaMC data
        
        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')  
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        
        if app.transform_archive:
            jspec.homepackage       = 'AnalysisTransforms'+app.transform_archive
        elif app.prod_release:
            jspec.homepackage       = 'AnalysisTransforms-AtlasProduction_'+str(app.prod_release)
        jspec.transformation    = '%s/runAthena-00-00-11' % Client.baseURLSUB
            
        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock    = 'NULL'
        if job.inputdata and len(app.inputfiles)>0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock    = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset=""
        for type in ["EVNT","RDO","HITS","AOD","ESD","NTUP"]:
            if type in app.outputpaths.keys():
                outdset=string.replace(app.outputpaths[type],"/",".")
                outdset=outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys())>0
            except:
                logger.error("app.outputpaths is empty: check your output datasets")
                raise
            type=app.outputpaths.keys()[0]
            outdset=string.replace(app.outputpaths[type],"/",".")
            outdset=outdset[1:-1]
            
        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel   = 'user'
        jspec.assignedPriority  = 1000
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite     = self.outsite
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
#       library (source files)
        flib = FileSpec()
        flib.lfn            = self.library
#        flib.GUID           = 
        flib.type           = 'input'
#        flib.status         = 
        flib.dataset        = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid=app.turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid=app.dbturls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid=app.minbias_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid=app.cavern_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
            

#       output files( this includes the logfiles)
        # Output files
        jidtag=""
        job = app._getParent() # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id       
        outfiles=app.subjobsOutfiles[job.id]
        pandaOutfiles={}
        for type in outfiles.keys():
            pandaOutfiles[type]=outfiles[type]+"."+str(jidtag)
            if type=="LOG":
                pandaOutfiles[type]+=".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset=string.replace(app.outputpaths[outtype],"/",".")
            dset=dset[1:-1]
            fout.dataset=dset
            fout.lfn=pandaOutfiles[outtype]
            fout.type              = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE    = jspec.destinationSE
            if outtype=='LOG':
                fout.type='log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE     = job.backend.site
            jspec.addFile(fout)


        #       job parameters
        param =  '-l %s ' % self.library # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '
        
        # transform parameters
        # need to update arglist with final output file name...
        newArgs=[]
        if app.mode == "evgen":
            app.args[3]=app.args[3]+" -t "
            if app.verbosity:
                app.args[3]=app.args[3]+" -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type])>-1:
                    arg=arg.replace(outfiles[type],pandaOutfiles[type])

            newArgs.append(arg)
        arglist=string.join(newArgs," ")
#        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles=app.inputfiles+app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles)>0:
            param+= ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles)>0:
            param+= ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles["LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL        
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"


        jspec.jobParameters = param
        jspec.metadata="--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return
        
        return jspec
Ejemplo n.º 49
0
import time
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

site = sys.argv[1]
cloud = sys.argv[2]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = None

jobList = []

for i in range(1):
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i)
    job.AtlasRelease = 'Atlas-15.6.10'
    job.homepackage = 'AtlasProduction/15.6.10.1'
    job.transformation = 'Evgen_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 10000
    job.prodSourceLabel = 'test'
    job.computingSite = site
    job.cloud = cloud
    job.cmtConfig = 'i686-slc5-gcc43-opt'

    file = FileSpec()
    file.lfn = "%s.evgen.pool.root" % job.jobName
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """Prepare the specific aspec of each subjob.
           Returns: subjobconfig list of objects understood by backends."""

        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime
        from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs
        
        # make sure we have the correct siteType
        refreshPandaSpecs()

        job = app._getParent()
        masterjob = job._getRoot()

        logger.debug('ProdTransPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        job.backend.actualCE = job.backend.site
        job.backend.requirements.cloud = Client.PandaSites[job.backend.site]['cloud']

        # check that the site is in a submit-able status
        if not job.splitter or job.splitter._name != 'DQ2JobSplitter':
            allowed_sites = job.backend.list_ddm_sites()

        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
            tmpDsExist = False
            if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')):
                #if Client.getDatasets(job.outputdata.datasetname):
                if getDatasets(job.outputdata.datasetname):
                    tmpDsExist = True
                    logger.info('Re-using output dataset %s'%job.outputdata.datasetname)
            if not configPanda['specialHandling']=='ddm:rucio' and not  configPanda['processingType'].startswith('gangarobot') and not configPanda['processingType'].startswith('hammercloud') and not configPanda['processingType'].startswith('rucio_test'):
                Client.addDataset(job.outputdata.datasetname,False,location=outDsLocation,allowProdDisk=True,dsExist=tmpDsExist)
            logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation)
        except exceptions.SystemExit:
            raise BackendError('Panda','Exception in adding dataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))
        
        # JobSpec.
        jspec = JobSpec()
        jspec.currentPriority = app.priority
        jspec.jobDefinitionID = masterjob.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.coreCount = app.core_count
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release
        jspec.homepackage = app.home_package
        jspec.transformation = app.transformation
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = job.backend.site
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        if app.prod_source_label:
            jspec.prodSourceLabel = app.prod_source_label
        else:
            jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.specialHandling = configPanda['specialHandling']
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        jspec.cmtConfig = app.atlas_cmtconfig
        if app.dbrelease == 'LATEST':
            try:
                latest_dbrelease = getLatestDBReleaseCaching()
            except:
                from pandatools import Client
                latest_dbrelease = Client.getLatestDBRelease()
            m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease)
            if m:
                self.dbrelease_dataset = m.group(1)
                self.dbrelease = m.group(2)
            else:
                raise ApplicationConfigurationError(None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually.")
        else:
            self.dbrelease_dataset = app.dbrelease_dataset
            self.dbrelease = app.dbrelease
        jspec.jobParameters = app.job_parameters

        if self.dbrelease:
            if self.dbrelease == 'current':
                jspec.jobParameters += ' --DBRelease=current' 
            else:
                if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                    jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
                else:
                    jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
                dbspec = FileSpec()
                dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease
                dbspec.dataset = self.dbrelease_dataset
                dbspec.prodDBlock = jspec.prodDBlock
                dbspec.type = 'input'
                jspec.addFile(dbspec)

        if job.inputdata:
            m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)',
                          job.inputdata.dataset[0])
            if not m:
                logger.error("Error retrieving run number from dataset name")
                #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name")
                runnumber = 105200
            else:
                runnumber = int(m.group(2))
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --runNumber %d' % runnumber
            else:
                jspec.jobParameters += ' RunNumber=%d' % runnumber
        
        # Output files.
        randomized_lfns = []
        ilfn = 0
        for lfn, lfntype in zip(app.output_files,app.output_type):
            ofspec = FileSpec()
            if app.randomize_lfns:
                randomized_lfn = lfn + ('.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4] ) )
            else:
                randomized_lfn = lfn
            ofspec.lfn = randomized_lfn
            randomized_lfns.append(randomized_lfn)
            ofspec.destinationDBlock = jspec.destinationDBlock
            ofspec.destinationSE = jspec.destinationSE
            ofspec.dataset = jspec.destinationDBlock
            ofspec.type = 'output'
            jspec.addFile(ofspec)
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --output%sFile %s' % (lfntype, randomized_lfns[ilfn])
            else:
                jspec.jobParameters += ' output%sFile=%s' % (lfntype, randomized_lfns[ilfn])
            ilfn=ilfn+1

        # Input files.
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes):
                ifspec = FileSpec()
                ifspec.lfn = lfn
                ifspec.GUID = guid
                ifspec.fsize = size
                ifspec.md5sum = checksum
                ifspec.scope = scope
                ifspec.dataset = jspec.prodDBlock
                ifspec.prodDBlock = jspec.prodDBlock
                ifspec.type = 'input'
                jspec.addFile(ifspec)
            if app.input_type:
                itype = app.input_type
            else:
                itype = m.group(5)
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(job.inputdata.names))
            else:
                jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(job.inputdata.names))

        # Log files.
        lfspec = FileSpec()
        lfspec.lfn = '%s.job.log.tgz' % jspec.jobName
        lfspec.destinationDBlock = jspec.destinationDBlock
        lfspec.destinationSE  = jspec.destinationSE
        lfspec.dataset = jspec.destinationDBlock
        lfspec.type = 'log'
        jspec.addFile(lfspec)
        
        return jspec
Ejemplo n.º 51
0
 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue,
                 resource_name):
     # list with a lock
     inputListWorld = ListWithLock([])
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug(
         'vo={0} label={1} queue={2} resource_name={3} nTasks={4}'.format(
             vo, prodSourceLabel, workQueue.queue_name, resource_name,
             len(inputList)))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # collect tasks for WORLD
             if taskSpec.useWorldCloud():
                 inputListWorld.append((taskSpec, inputChunk))
                 continue
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = 'managed'
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (
                 taskSpec.getOutDiskSize() +
                 taskSpec.getWorkDiskSize()) // 1024 // 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(
                         datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock is False and prodDBlock is not None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if jobSpec.currentPriority not in allRwMap:
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     jobSpec.currentPriority)
                 if tmpRW is None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(
                 jobSpec.jediTaskID)
             if expRW is None:
                 tmpLog.error(
                     'failed to calculate RW for jediTaskID={0}'.format(
                         jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # for old clouds
     if jobSpecList != []:
         # get fullRWs
         fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(
             vo, prodSourceLabel, None, None)
         if fullRWs is None:
             tmpLog.error('failed to calculate full RW')
             return retTmpError
         # set metadata
         for jobSpec in jobSpecList:
             rwValues = allRwMap[jobSpec.currentPriority]
             jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
                 jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap),
                 str(fullRWs), str(tt2Map))
         tmpLog.debug('run task assigner for {0} tasks'.format(
             len(jobSpecList)))
         nBunchTask = 0
         while nBunchTask < len(jobSpecList):
             # get a bunch
             jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask]
             strIDs = 'jediTaskID='
             for tmpJobSpec in jobsBunch:
                 strIDs += '{0},'.format(tmpJobSpec.taskID)
             strIDs = strIDs[:-1]
             tmpLog.debug(strIDs)
             # increment index
             nBunchTask += maxBunchTask
             # run task brokerge
             stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
             tmpLog.debug('{0}:{1}'.format(stS, str(outSs)))
     # for WORLD
     if len(inputListWorld) > 0:
         # thread pool
         threadPool = ThreadPool()
         # get full RW for WORLD
         fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(
             vo, prodSourceLabel, None, None)
         if fullRWs is None:
             tmpLog.error('failed to calculate full WORLD RW')
             return retTmpError
         # get RW per priority
         for taskSpec, inputChunk in inputListWorld:
             if taskSpec.currentPriority not in allRwMap:
                 tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     taskSpec.currentPriority)
                 if tmpRW is None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             taskSpec.currentPriority))
                     return retTmpError
                 allRwMap[taskSpec.currentPriority] = tmpRW
         # live counter for RWs
         liveCounter = MapWithLock(allRwMap)
         # make workers
         ddmIF = self.ddmIF.getInterface(vo)
         for iWorker in range(4):
             thr = AtlasProdTaskBrokerThread(inputListWorld, threadPool,
                                             self.taskBufferIF, ddmIF,
                                             fullRWs, liveCounter,
                                             workQueue)
             thr.start()
         threadPool.join(60 * 10)
     # return
     tmpLog.debug('doBrokerage done')
     return self.SC_SUCCEEDED
Ejemplo n.º 52
0
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = 'BNL_ATLAS_2'

files = {
    'EVNT.019128._00011.pool.root.1': None,
}

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index)
    job.AtlasRelease = 'Atlas-13.0.40'
    job.homepackage = 'AtlasProduction/13.0.40.3'
    job.transformation = 'csc_simul_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.computingSite = site
    job.prodDBlock = 'valid1.005001.pythia_minbias.evgen.EVNT.e306_tid019128'

    job.prodSourceLabel = 'test'
    job.currentPriority = 10000
    job.cloud = 'IT'

    fileI = FileSpec()
Ejemplo n.º 53
0
    def createJobSpec(self, task, outdataset, job, jobset, jobdef, site, jobname, lfnhanger, allsites, jobid):
        """Create a spec for one job

        :arg TaskWorker.DataObject.Task task: the task to work on
        :arg str outdataset: the output dataset name where all the produced files will be placed
        :arg WMCore.DataStructs.Job job: the abstract job
        :arg int jobset: the PanDA jobset corresponding to the current task
        :arg int jobdef: the PanDA jobdef where to append the current jobs --- not used
        :arg str site: the borkered site where to run the jobs
        :arg str jobname: the job name
        :arg str lfnhanger: the random string to be added in the output file name
        :arg list str allsites: all possible sites where the job can potentially run
        :arg int jobid: incremental job number
        :return: the sepc object."""

        pandajob = JobSpec()
        ## always setting a job definition ID
        pandajob.jobDefinitionID = jobdef if jobdef else -1
        ## always setting a job set ID
        pandajob.jobsetID = jobset if jobset else -1
        pandajob.jobName = jobname
        pandajob.prodUserID = task['tm_user_dn']
        pandajob.destinationDBlock = outdataset
        pandajob.prodDBlock = task['tm_input_dataset']
        pandajob.prodSourceLabel = 'user'
        pandajob.computingSite = site
        pandajob.cloud = getSite(pandajob.computingSite)
        pandajob.destinationSE = 'local'
        pandajob.transformation = task['tm_transformation']
        ## need to initialize this
        pandajob.metadata = ''

        def outFileSpec(of=None, log=False):
            """Local routine to create an FileSpec for the an job output/log file

               :arg str of: output file base name
               :return: FileSpec object for the output file."""
            outfile = FileSpec()
            if log:
                outfile.lfn = "job.log_%d_%s.tgz" % (jobid, lfnhanger)
                outfile.type = 'log'
            else:
                outfile.lfn = '%s_%d_%s%s' %(os.path.splitext(of)[0], jobid, lfnhanger, os.path.splitext(of)[1])
                outfile.type = 'output'
            outfile.destinationDBlock = pandajob.destinationDBlock
            outfile.destinationSE = task['tm_asyncdest']
            outfile.dataset = pandajob.destinationDBlock
            return outfile

        alloutfiles = []
        outjobpar = {}
        outfilestring = ''
        for outputfile in task['tm_outfiles']:
            outfilestring += '%s,' % outputfile
            filespec = outFileSpec(outputfile)
            alloutfiles.append(filespec)
            #pandajob.addFile(filespec)
            outjobpar[outputfile] = filespec.lfn
        for outputfile in task['tm_tfile_outfiles']:
            outfilestring += '%s,' % outputfile
            filespec = outFileSpec(outputfile)
            alloutfiles.append(filespec)
            #pandajob.addFile(filespec)
            outjobpar[outputfile] = filespec.lfn
        for outputfile in task['tm_edm_outfiles']:
            outfilestring += '%s,' % outputfile
            filespec = outFileSpec(outputfile)
            alloutfiles.append(filespec)
            #pandajob.addFile(filespec)
            outjobpar[outputfile] = filespec.lfn
        outfilestring = outfilestring[:-1]

        infiles = []
        for inputfile in job['input_files']:
            infiles.append( inputfile['lfn'] )

        pandajob.jobParameters = '-a %s ' % task['tm_user_sandbox']
        pandajob.jobParameters += '--sourceURL %s ' % task['tm_cache_url']
        pandajob.jobParameters += '--jobNumber=%s ' % jobid
        pandajob.jobParameters += '--cmsswVersion=%s ' % task['tm_job_sw']
        pandajob.jobParameters += '--scramArch=%s ' % task['tm_job_arch']
        pandajob.jobParameters += '--inputFile=\'%s\' ' % json.dumps(infiles)

        self.jobParametersSetting(pandajob, job, self.jobtypeMapper[task['tm_job_type']])

        pandajob.jobParameters += '-o "%s" ' % str(outjobpar)
        pandajob.jobParameters += '--dbs_url=%s ' % task['tm_dbs_url']
        pandajob.jobParameters += '--publish_dbs_url=%s ' % task['tm_publish_dbs_url']
        pandajob.jobParameters += '--publishFiles=%s ' % ('True' if task['tm_publication'] == 'T' else 'False')
        pandajob.jobParameters += '--saveLogs=%s ' % ('True' if task['tm_save_logs'] == 'T' else 'False')
        pandajob.jobParameters += '--availableSites=\'%s\' ' %json.dumps(allsites)

        pandajob.jobParameters += '--group=%s ' % (task['tm_user_group'] if task['tm_user_group'] else '')
        pandajob.jobParameters += '--role=%s ' % (task['tm_user_role'] if task['tm_user_role'] else '')

        self.logger.info(type(task['tm_user_infiles']))
        self.logger.info(task['tm_user_infiles'])

        if task['tm_user_infiles']:
            addinfilestring = ''
            for addinfile in task['tm_user_infiles']:
                addinfilestring += '%s,' % addinfile
            pandajob.jobParameters += '--userFiles=%s ' % ( addinfilestring[:-1] )

        pandajob.jobName = '%s' % task['tm_taskname'] #Needed by ASO and Dashboard

        if 'panda_oldjobid' in job and job['panda_oldjobid']:
            pandajob.parentID = job['panda_oldjobid']

        pandajob.addFile(outFileSpec(log=True))
        for filetoadd in alloutfiles:
            pandajob.addFile(filetoadd)

        return pandajob
Ejemplo n.º 54
0
    def master_prepare(self,app,appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.')) 

        # set chirp variables
        if configPanda['chirpconfig'] or configPanda['chirpserver']:
            setChirpVariables()

#       Pack inputsandbox
        inputsandbox = 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null') 
        inpw = job.getInputWorkspace()
        # add user script to inputsandbox
        if hasattr(job.application.exe, "name"):
            if not job.application.exe in job.inputsandbox:
                job.inputsandbox.append(job.application.exe)

        for fname in [f.name for f in job.inputsandbox]:
            fname.rstrip(os.sep)
            path = fname[:fname.rfind(os.sep)]
            f = fname[fname.rfind(os.sep)+1:]
            rc, output = commands.getstatusoutput('tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
        if len(job.inputsandbox) > 0:
            rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox)))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
            inputsandbox += ".gz"
        else:
            inputsandbox = None

#       Upload Inputsandbox
        if inputsandbox:
            logger.debug('Uploading source tarball ...')
            uploadSources(inpw.getPath(),os.path.basename(inputsandbox))
            self.inputsandbox = inputsandbox
        else:
            self.inputsandbox = None

#       input dataset
        if job.inputdata:
            if job.inputdata._name != 'DQ2Dataset':
                raise ApplicationConfigurationError('PANDA application supports only DQ2Datasets')

        # run brokerage here if not splitting
        if not job.splitter:
            from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
            runPandaBrokerage(job)
        elif job.splitter._name not in ['DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask']:
            raise ApplicationConfigurationError('Panda splitter must be DQ2JobSplitter or ArgSplitter')
        
        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError('site is still AUTO after brokerage!')

#       output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname)

        self.outDsLocation = Client.PandaSites[job.backend.site]['ddm']

        try:
            Client.addDataset(job.outputdata.datasetname,False,location=self.outDsLocation)
            logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,self.outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation)
        except exceptions.SystemExit:
            raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))

        # handle the libds
        if job.backend.libds:
            self.libDataset = job.backend.libds
            self.fileBO = getLibFileSpecFromLibDS(self.libDataset)
            self.library = self.fileBO.lfn
        elif job.backend.bexec:
            self.libDataset = job.outputdata.datasetname+'.lib'
            self.library = '%s.tgz' % self.libDataset
            try:
                Client.addDataset(self.libDataset,False,location=self.outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation)
                logger.info('Lib dataset %s registered at %s'%(self.libDataset,self.outDsLocation))
            except exceptions.SystemExit:
                raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(self.libDataset,sys.exc_info()[0],sys.exc_info()[1]))

        # collect extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.outputsandbox:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # create build job
        if job.backend.bexec != '':
            jspec = JobSpec()
            jspec.jobDefinitionID   = job.id
            jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
            jspec.transformation    = '%s/buildGen-00-00-01' % Client.baseURLSUB
            if Client.isDQ2free(job.backend.site):
                jspec.destinationDBlock = '%s/%s' % (job.outputdata.datasetname,self.libDataset)
                jspec.destinationSE     = 'local'
            else:
                jspec.destinationDBlock = self.libDataset
                jspec.destinationSE     = job.backend.site
            jspec.prodSourceLabel   = configPanda['prodSourceLabelBuild']
            jspec.processingType    = configPanda['processingType']
            jspec.assignedPriority  = configPanda['assignedPriorityBuild']
            jspec.computingSite     = job.backend.site
            jspec.cloud             = job.backend.requirements.cloud
            jspec.jobParameters     = '-o %s' % (self.library)
            if self.inputsandbox:
                jspec.jobParameters     += ' -i %s' % (self.inputsandbox)
            else:
                raise ApplicationConfigurationError('Executable on Panda with build job defined, but inputsandbox is emtpy !')
            matchURL = re.search('(http.*://[^/]+)/',Client.baseURLCSRVSSL)
            if matchURL:
                jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1)
            if job.backend.bexec != '':
                jspec.jobParameters += ' --bexec "%s" ' % urllib.quote(job.backend.bexec)
                jspec.jobParameters += ' -r %s ' % '.'
                

            fout = FileSpec()
            fout.lfn  = self.library
            fout.type = 'output'
            fout.dataset = self.libDataset
            fout.destinationDBlock = self.libDataset
            jspec.addFile(fout)

            flog = FileSpec()
            flog.lfn = '%s.log.tgz' % self.libDataset
            flog.type = 'log'
            flog.dataset = self.libDataset
            flog.destinationDBlock = self.libDataset
            jspec.addFile(flog)
            return jspec
        else:
            return None
Ejemplo n.º 55
0
import time
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

site = sys.argv[1]
cloud = sys.argv[2]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = None

jobList = []

for i in range(1):
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i)
    job.AtlasRelease = 'Atlas-17.0.5'
    job.homepackage = 'AtlasProduction/17.0.5.6'
    job.transformation = 'Evgen_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 10000
    job.prodSourceLabel = 'test'
    job.computingSite = site
    job.cloud = cloud
    job.cmtConfig = 'i686-slc5-gcc43-opt'

    file = FileSpec()
    file.lfn = "%s.evgen.pool.root" % job.jobName
Ejemplo n.º 56
0
    site = sys.argv[1]
    cloud='CA'
elif  len(sys.argv)==3:
    site = sys.argv[1]
    cloud=sys.argv[2]    
else:
    site = None
    cloud = None
    
datasetName = 'panda.destDB.%s_tid999991' % commands.getoutput('uuidgen')
taskid = 999989

jobList = []

for i in range(1):
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),i)
#    job.AtlasRelease      = 'Atlas-12.0.6'
#    job.homepackage       = 'AtlasProduction/12.0.6.5'
    job.AtlasRelease      = 'Atlas-12.0.7'
    job.homepackage       = 'AtlasProduction/12.0.7.1'

    job.transformation    = 'csc_evgen_trf.py'
    job.destinationDBlock = datasetName
#    job.destinationSE     = destName
#    job.cloud             = 'CA'
    job.cloud             = cloud
    job.taskID = taskid
    job.currentPriority   = 1000
    job.prodSourceLabel   = 'test'
Ejemplo n.º 57
0
    def run(self):
        try:
            while True:
                _logger.debug('%s start' % self.pandaID)
                # query job
                job = self.taskBuffer.peekJobs([self.pandaID],fromDefined=False,
                                               fromArchived=False,fromWaiting=False)[0]
                _logger.debug('%s in %s' % (self.pandaID, job.jobStatus))
                # check job status
                if job == None:
                    _logger.debug('%s escape : not found' % self.pandaID)
                    return
                if not job.jobStatus in ['running','sent','starting','holding',
                                         'stagein','stageout']:
                    if job.jobStatus == 'transferring' and (job.prodSourceLabel in ['user','panda'] or job.jobSubStatus not in [None, 'NULL', '']):
                        pass
                    else:
                        _logger.debug('%s escape : %s' % (self.pandaID,job.jobStatus))
                        return
                # time limit
                timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=self.sleepTime)
                if job.modificationTime < timeLimit or (job.endTime != 'NULL' and job.endTime < timeLimit):
                    _logger.debug('%s %s lastmod:%s endtime:%s' % (job.PandaID,job.jobStatus,
                                                                   str(job.modificationTime),
                                                                   str(job.endTime)))
                    destDBList = []
                    if job.jobStatus == 'sent':
                        # sent job didn't receive reply from pilot within 30 min
                        job.jobDispatcherErrorCode = ErrorCode.EC_SendError
                        job.jobDispatcherErrorDiag = "Sent job didn't receive reply from pilot within 30 min"
                    elif job.exeErrorDiag == 'NULL' and job.pilotErrorDiag == 'NULL':
                        # lost heartbeat
                        job.jobDispatcherErrorCode = ErrorCode.EC_Watcher
                        if job.jobDispatcherErrorDiag == 'NULL':
                            if job.endTime == 'NULL':
                                # normal lost heartbeat
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(job.modificationTime)
                            else:
                                # job recovery failed
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(job.endTime)
                                if job.jobStatus == 'transferring':
                                    job.jobDispatcherErrorDiag += ' in transferring'
                            # get worker
                            workerSpecs = self.taskBuffer.getWorkersForJob(job.PandaID)
                            if len(workerSpecs) > 0:
                                workerSpec = workerSpecs[0]
                                if workerSpec.status in ['finished', 'failed', 'cancelled', 'missed']:
                                    job.supErrorCode = SupErrors.error_codes['WORKER_ALREADY_DONE']
                                    job.supErrorDiag = 'worker already {0} at {1} with {2}'.format(workerSpec.status, str(workerSpec.endTime),
                                                                                                   workerSpec.diagMessage)
                                    job.supErrorDiag = JobSpec.truncateStringAttr('supErrorDiag', job.supErrorDiag)
                    else:
                        # job recovery failed
                        job.jobDispatcherErrorCode = ErrorCode.EC_Recovery
                        job.jobDispatcherErrorDiag = 'job recovery failed for %s hours' % (self.sleepTime/60)
                    # set job status
                    job.jobStatus = 'failed'
                    # set endTime for lost heartbeat
                    if job.endTime == 'NULL':
                        # normal lost heartbeat
                        job.endTime = job.modificationTime
                    # set files status
                    for file in job.Files:
                        if file.type == 'output' or file.type == 'log':
                            file.status = 'failed'
                            if not file.destinationDBlock in destDBList:
                                destDBList.append(file.destinationDBlock)
                    # event service
                    if EventServiceUtils.isEventServiceJob(job) and not EventServiceUtils.isJobCloningJob(job):
                        eventStat = self.taskBuffer.getEventStat(job.jediTaskID, job.PandaID)
                        # set sub status when no sucessful events
                        if EventServiceUtils.ST_finished not in eventStat:
                            job.jobSubStatus = 'es_heartbeat'
                    # update job
                    self.taskBuffer.updateJobs([job],False)
                    # start closer
                    if job.jobStatus == 'failed':

                        source = 'jobDispatcherErrorCode'
                        error_code = job.jobDispatcherErrorCode
                        error_diag = job.jobDispatcherErrorDiag

                        try:
                            _logger.debug("Watcher will call apply_retrial_rules")
                            retryModule.apply_retrial_rules(self.taskBuffer, job.PandaID, source, error_code, error_diag, job.attemptNr)
                            _logger.debug("apply_retrial_rules is back")
                        except Exception as e:
                            _logger.debug("apply_retrial_rules excepted and needs to be investigated (%s): %s"%(e, traceback.format_exc()))

                        # updateJobs was successful and it failed a job with taskBufferErrorCode
                        try:

                            _logger.debug("Watcher.run will peek the job")
                            job_tmp = self.taskBuffer.peekJobs([job.PandaID], fromDefined=False, fromArchived=True,
                                                               fromWaiting=False)[0]
                            if job_tmp.taskBufferErrorCode:
                                source = 'taskBufferErrorCode'
                                error_code = job_tmp.taskBufferErrorCode
                                error_diag = job_tmp.taskBufferErrorDiag
                                _logger.debug("Watcher.run 2 will call apply_retrial_rules")
                                retryModule.apply_retrial_rules(self.taskBuffer, job_tmp.PandaID, source, error_code,
                                                                error_diag, job_tmp.attemptNr)
                                _logger.debug("apply_retrial_rules 2 is back")
                        except IndexError:
                            pass
                        except Exception as e:
                            self.logger.error("apply_retrial_rules 2 excepted and needs to be investigated (%s): %s" % (e, traceback.format_exc()))

                        cThr = Closer(self.taskBuffer,destDBList,job)
                        cThr.start()
                        cThr.join()
                    _logger.debug('%s end' % job.PandaID)                        
                    return
                # single action
                if self.single:
                    return
                # sleep
                time.sleep(60*self.sleepTime)
        except:
            type, value, traceBack = sys.exc_info()
            _logger.error("run() : %s %s" % (type,value))
            return