def createJobSpec(nodes, walltime, command, jobName, outputFile=None): transformation = '#json#' datasetName = 'panda.destDB.%s' % subprocess.check_output('uuidgen') destName = 'local' prodSourceLabel = 'user' currentPriority = 1000 job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = jobName job.VO = VO job.transformation = transformation job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = currentPriority job.prodSourceLabel = prodSourceLabel job.computingSite = QUEUE_NAME job.cmtConfig = json.dumps({'name': job.jobName, 'next': None}) lqcd_command = { "nodes": nodes, "walltime": walltime, "name": job.jobName, "command": command } if (outputFile): lqcd_command['outputFile'] = outputFile job.jobParameters = json.dumps(lqcd_command) fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % job.jobName.strip() fileOL.destinationDBlock = job.destinationDBlock fileOL.destinationSE = job.destinationSE fileOL.dataset = job.destinationDBlock fileOL.type = 'log' job.addFile(fileOL) #job.cmtConfig = None return job
def defineEvgen16Job(self, i): """Define an Evgen16 job based on predefined values and randomly generated names """ job = JobSpec() job.computingSite = self.__site job.cloud = self.__cloud job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (uuid.uuid1(), i) job.AtlasRelease = 'Atlas-16.6.2' job.homepackage = 'AtlasProduction/16.6.2.1' job.transformation = 'Evgen_trf.py' job.destinationDBlock = self.__datasetName job.destinationSE = self.__destName job.currentPriority = 10000 job.prodSourceLabel = 'test' job.cmtConfig = 'i686-slc5-gcc43-opt' #Output file fileO = FileSpec() fileO.lfn = "%s.evgen.pool.root" % job.jobName fileO.destinationDBlock = job.destinationDBlock fileO.destinationSE = job.destinationSE fileO.dataset = job.destinationDBlock fileO.destinationDBlockToken = 'ATLASDATADISK' fileO.type = 'output' job.addFile(fileO) #Log file fileL = FileSpec() fileL.lfn = "%s.job.log.tgz" % job.jobName fileL.destinationDBlock = job.destinationDBlock fileL.destinationSE = job.destinationSE fileL.dataset = job.destinationDBlock fileL.destinationDBlockToken = 'ATLASDATADISK' fileL.type = 'log' job.addFile(fileL) job.jobParameters = "2760 105048 19901 101 200 MC10.105048.PythiaB_ccmu3mu1X.py %s NONE NONE NONE MC10JobOpts-latest-test.tar.gz" % fileO.lfn return job
def createJob(self, name, nodes, walltime, command, inputs=None, queuename=None): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s" % commands.getoutput('uuidgen') job.VO = self.vo job.transformation = self.transformation job.destinationDBlock = self.datasetName job.destinationSE = self.destName job.currentPriority = self.currentPriority job.prodSourceLabel = self.prodSourceLabel job.computingSite = self.site if queuename is None else queuename lqcd_command = { "nodes": nodes, "walltime": walltime, "name": name, "command": command } job.jobParameters = json.dumps(lqcd_command) fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % job.jobName fileOL.destinationDBlock = job.destinationDBlock fileOL.destinationSE = job.destinationSE fileOL.dataset = job.destinationDBlock fileOL.type = 'log' job.addFile(fileOL) job.cmtConfig = inputs return job
def master_prepare(self,app,appmasterconfig): # PandaTools from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaMCPandaRTHandler master_prepare called for %s', job.getFQID('.')) usertag = configDQ2['usertag'] #usertag='user09' nickname = getNickname(allowMissingNickname=True) self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (usertag,nickname,commands.getoutput('hostname').split('.')[0],int(time.time()),job.id) # self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity()) sources = 'sources.%s.tar.gz' % commands.getoutput('uuidgen 2> /dev/null') self.library = '%s.lib.tgz' % self.libDataset # check DBRelease # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1: # raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'") # unpack library logger.debug('Creating source tarball ...') tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null') os.mkdir(tmpdir) inputbox=[] if os.path.exists(app.transform_archive): # must add a condition on size. inputbox += [ File(app.transform_archive) ] if app.evgen_job_option: self.evgen_job_option=app.evgen_job_option if os.path.exists(app.evgen_job_option): # locally modified job option file to add to the input sand box inputbox += [ File(app.evgen_job_option) ] self.evgen_job_option=app.evgen_job_option.split("/")[-1] # add input sandbox files if (job.inputsandbox): for file in job.inputsandbox: inputbox += [ file ] # add option files for extFile in job.backend.extOutFile: try: shutil.copy(extFile,tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(extFile,tmpdir) # fill the archive for opt_file in inputbox: try: shutil.copy(opt_file.name,tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(opt_file.name,tmpdir) # now tar it up again inpw = job.getInputWorkspace() rc, output = commands.getstatusoutput('tar czf %s -C %s .' % (inpw.getPath(sources),tmpdir)) if rc: logger.error('Packing sources failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError(None,'Packing sources failed.') shutil.rmtree(tmpdir) # upload sources logger.debug('Uploading source tarball ...') try: cwd = os.getcwd() os.chdir(inpw.getPath()) rc, output = Client.putFile(sources) if output != 'True': logger.error('Uploading sources %s failed. Status = %d', sources, rc) logger.error(output) raise ApplicationConfigurationError(None,'Uploading archive failed') finally: os.chdir(cwd) # Use Panda's brokerage ## if job.inputdata and len(app.sites)>0: ## # update cloud, use inputdata's ## from dq2.info.TiersOfATLAS import whichCloud,ToACache ## inclouds=[] ## for site in app.sites: ## cloudSite=whichCloud(app.sites[0]) ## if cloudSite not in inclouds: ## inclouds.append(cloudSite) ## # now converting inclouds content into proper brokering stuff. ## outclouds=[] ## for cloudSite in inclouds: ## for cloudID, eachCloud in ToACache.dbcloud.iteritems(): ## if cloudSite==eachCloud: ## cloud=cloudID ## outclouds.append(cloud) ## break ## print outclouds ## # finally, matching with user's wishes ## if len(outclouds)>0: ## if not job.backend.requirements.cloud: # no user wish, update ## job.backend.requirements.cloud=outclouds[0] ## else: ## try: ## assert job.backend.requirements.cloud in outclouds ## except: ## raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds))) from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) if job.backend.site == 'AUTO': raise ApplicationConfigurationError(None,'site is still AUTO after brokerage!') # output dataset preparation and registration try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] except: raise ApplicationConfigurationError(None,"Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site) if not app.dryrun: for outtype in app.outputpaths.keys(): dset=string.replace(app.outputpaths[outtype],"/",".") dset=dset[1:] # dataset registration must be done only once. print "registering output dataset %s at %s" % (dset,outDsLocation) try: Client.addDataset(dset,False,location=outDsLocation) dq2_set_dataset_lifetime(dset, location=outDsLocation) except: raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % dset) # extend registration to build job lib dataset: print "registering output dataset %s at %s" % (self.libDataset,outDsLocation) try: Client.addDataset(self.libDataset,False,location=outDsLocation) dq2_set_dataset_lifetime(self.libDataset, outDsLocation) except: raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % self.libDataset) ### cacheVer = "-AtlasProduction_" + str(app.prod_release) logger.debug("master job submit?") self.outsite=job.backend.site if app.se_name and app.se_name != "none" and not self.outsite: self.outsite=app.se_name # create build job jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel jspec.homepackage = 'AnalysisTransforms'+cacheVer#+nightVer jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare() jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = 'panda' jspec.assignedPriority = 2000 jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud # jspec.jobParameters = self.args not known yet jspec.jobParameters = '-o %s' % (self.library) if app.userarea: print app.userarea jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea)) else: jspec.jobParameters += ' -i %s' % (sources) jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel) matchURL = re.search('(http.*://[^/]+)/',Client.baseURLSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1) fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) #print "MASTER JOB DETAILS:",jspec.jobParameters return jspec
def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): '''prepare the subjob specific configuration''' # PandaTools from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.')) try: assert self.outsite except: logger.error("outsite not set. Aborting") raise Exception() job.backend.site = self.outsite job.backend.actualCE = self.outsite cloud = job._getRoot().backend.requirements.cloud job.backend.requirements.cloud = cloud # now just filling the job from AthenaMC data jspec = JobSpec() jspec.jobDefinitionID = job._getRoot().id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel if app.transform_archive: jspec.homepackage = 'AnalysisTransforms'+app.transform_archive elif app.prod_release: jspec.homepackage = 'AnalysisTransforms-AtlasProduction_'+str(app.prod_release) jspec.transformation = '%s/runAthena-00-00-11' % Client.baseURLSUB #---->???? prodDBlock and destinationDBlock when facing several input / output datasets? jspec.prodDBlock = 'NULL' if job.inputdata and len(app.inputfiles)>0 and app.inputfiles[0] in app.dsetmap: jspec.prodDBlock = app.dsetmap[app.inputfiles[0]] # How to specify jspec.destinationDBlock when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset outdset="" for type in ["EVNT","RDO","HITS","AOD","ESD","NTUP"]: if type in app.outputpaths.keys(): outdset=string.replace(app.outputpaths[type],"/",".") outdset=outdset[1:-1] break if not outdset: try: assert len(app.outputpaths.keys())>0 except: logger.error("app.outputpaths is empty: check your output datasets") raise type=app.outputpaths.keys()[0] outdset=string.replace(app.outputpaths[type],"/",".") outdset=outdset[1:-1] jspec.destinationDBlock = outdset jspec.destinationSE = self.outsite jspec.prodSourceLabel = 'user' jspec.assignedPriority = 1000 jspec.cloud = cloud # memory if job.backend.requirements.memory != -1: jspec.minRamCount = job.backend.requirements.memory jspec.computingSite = self.outsite jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel) # library (source files) flib = FileSpec() flib.lfn = self.library # flib.GUID = flib.type = 'input' # flib.status = flib.dataset = self.libDataset flib.dispatchDBlock = self.libDataset jspec.addFile(flib) # input files FIXME: many more input types for lfn in app.inputfiles: useguid=app.turls[lfn].replace("guid:","") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # add dbfiles if any: for lfn in app.dbfiles: useguid=app.dbturls[lfn].replace("guid:","") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # then minbias files for lfn in app.mbfiles: useguid=app.minbias_turls[lfn].replace("guid:","") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # then cavern files for lfn in app.cavernfiles: useguid=app.cavern_turls[lfn].replace("guid:","") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # output files( this includes the logfiles) # Output files jidtag="" job = app._getParent() # Returns job or subjob object if job._getRoot().subjobs: jidtag = job._getRoot().id else: jidtag = "%d" % job.id outfiles=app.subjobsOutfiles[job.id] pandaOutfiles={} for type in outfiles.keys(): pandaOutfiles[type]=outfiles[type]+"."+str(jidtag) if type=="LOG": pandaOutfiles[type]+=".tgz" #print pandaOutfiles for outtype in pandaOutfiles.keys(): fout = FileSpec() dset=string.replace(app.outputpaths[outtype],"/",".") dset=dset[1:-1] fout.dataset=dset fout.lfn=pandaOutfiles[outtype] fout.type = 'output' # fout.destinationDBlock = jspec.destinationDBlock fout.destinationDBlock = fout.dataset fout.destinationSE = jspec.destinationSE if outtype=='LOG': fout.type='log' fout.destinationDBlock = fout.dataset fout.destinationSE = job.backend.site jspec.addFile(fout) # job parameters param = '-l %s ' % self.library # user tarball. # use corruption checker if job.backend.requirements.corCheck: param += '--corCheck ' # disable to skip missing files if job.backend.requirements.notSkipMissing: param += '--notSkipMissing ' # transform parameters # need to update arglist with final output file name... newArgs=[] if app.mode == "evgen": app.args[3]=app.args[3]+" -t " if app.verbosity: app.args[3]=app.args[3]+" -l %s " % app.verbosity for arg in app.args[3:]: for type in outfiles.keys(): if arg.find(outfiles[type])>-1: arg=arg.replace(outfiles[type],pandaOutfiles[type]) newArgs.append(arg) arglist=string.join(newArgs," ") # print "Arglist:",arglist param += ' -r ./ ' param += ' -j "%s"' % urllib.quote(arglist) allinfiles=app.inputfiles+app.dbfiles # Input files. param += ' -i "%s" ' % allinfiles if len(app.mbfiles)>0: param+= ' -m "%s" ' % app.mbfiles if len(app.cavernfiles)>0: param+= ' -n "%s" ' % app.cavernfiles # param += '-m "[]" ' #%minList FIXME # param += '-n "[]" ' #%cavList FIXME del pandaOutfiles["LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore... param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items()) # source URL matchURL = re.search("(http.*://[^/]+)/",Client.baseURLSSL) if matchURL != None: param += " --sourceURL %s " % matchURL.group(1) param += " --trf" jspec.jobParameters = param jspec.metadata="--trf \"%s\"" % arglist #print "SUBJOB DETAILS:",jspec.values() if app.dryrun: print "job.application.dryrun activated, printing out job parameters" print jspec.values() return return jspec
def run(self): try: # get job tmpJobs = self.taskBuffer.getFullJobStatus([self.rPandaID]) if tmpJobs == [] or tmpJobs[0] == None: _logger.debug("cannot find job for PandaID=%s" % self.rPandaID) return self.job = tmpJobs[0] _logger.debug("%s start %s:%s:%s" % (self.token,self.job.jobDefinitionID,self.job.prodUserName,self.job.computingSite)) # using output container if not self.job.destinationDBlock.endswith('/'): _logger.debug("%s ouput dataset container is required" % self.token) _logger.debug("%s end" % self.token) return # FIXEME : dont' touch group jobs for now if self.job.destinationDBlock.startswith('group') and (not self.userRequest): _logger.debug("%s skip group jobs" % self.token) _logger.debug("%s end" % self.token) return # check processingType typesForRebro = ['pathena','prun','ganga','ganga-rbtest'] if not self.job.processingType in typesForRebro: _logger.debug("%s skip processingType=%s not in %s" % \ (self.token,self.job.processingType,str(typesForRebro))) _logger.debug("%s end" % self.token) return # check jobsetID if self.job.jobsetID in [0,'NULL',None]: _logger.debug("%s jobsetID is undefined" % self.token) _logger.debug("%s end" % self.token) return # check metadata if self.job.metadata in [None,'NULL']: _logger.debug("%s metadata is unavailable" % self.token) _logger.debug("%s end" % self.token) return # check --disableRebrokerage match = re.search("--disableRebrokerage",self.job.metadata) if match != None and (not self.simulation) and (not self.forceOpt) \ and (not self.userRequest): _logger.debug("%s diabled rebrokerage" % self.token) _logger.debug("%s end" % self.token) return # check --site match = re.search("--site",self.job.metadata) if match != None and (not self.simulation) and (not self.forceOpt) \ and (not self.userRequest): _logger.debug("%s --site is used" % self.token) _logger.debug("%s end" % self.token) return # check --libDS match = re.search("--libDS",self.job.metadata) if match != None: _logger.debug("%s --libDS is used" % self.token) _logger.debug("%s end" % self.token) return # check --workingGroup since it is site-specific match = re.search("--workingGroup",self.job.metadata) if match != None: _logger.debug("%s workingGroup is specified" % self.token) _logger.debug("%s end" % self.token) return # avoid too many rebrokerage if not self.checkRev(): _logger.debug("%s avoid too many rebrokerage" % self.token) _logger.debug("%s end" % self.token) return # check if multiple JobIDs use the same libDS if self.bPandaID != None and self.buildStatus not in ['finished','failed']: if self.minPandaIDlibDS == None or self.maxPandaIDlibDS == None: _logger.debug("%s max/min PandaIDs are unavailable for the libDS" % self.token) _logger.debug("%s end" % self.token) return tmpPandaIDsForLibDS = self.taskBuffer.getFullJobStatus([self.minPandaIDlibDS,self.maxPandaIDlibDS]) if len(tmpPandaIDsForLibDS) != 2 or tmpPandaIDsForLibDS[0] == None or tmpPandaIDsForLibDS[1] == None: _logger.debug("%s failed to get max/min PandaIDs for the libDS" % self.token) _logger.debug("%s end" % self.token) return # check if tmpPandaIDsForLibDS[0].jobDefinitionID != tmpPandaIDsForLibDS[1].jobDefinitionID: _logger.debug("%s multiple JobIDs use the libDS %s:%s %s:%s" % (self.token,tmpPandaIDsForLibDS[0].jobDefinitionID, self.minPandaIDlibDS,tmpPandaIDsForLibDS[1].jobDefinitionID, self.maxPandaIDlibDS)) _logger.debug("%s end" % self.token) return # check excludedSite if self.excludedSite == None: self.excludedSite = [] match = re.search("--excludedSite( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata) if match != None: self.excludedSite = match.group(3).split(',') # remove empty try: self.excludedSite.remove('') except: pass _logger.debug("%s excludedSite=%s" % (self.token,str(self.excludedSite))) # check cloud if self.cloud == None: match = re.search("--cloud( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata) if match != None: self.cloud = match.group(3) _logger.debug("%s cloud=%s" % (self.token,self.cloud)) # get inDS/LFNs status,tmpMapInDS,maxFileSize = self.taskBuffer.getInDatasetsForReBrokerage(self.jobID,self.userName) if not status: # failed _logger.error("%s failed to get inDS/LFN from DB" % self.token) return status,inputDS = self.getListDatasetsUsedByJob(tmpMapInDS) if not status: # failed _logger.error("%s failed" % self.token) return # get relicas replicaMap = {} unknownSites = {} for tmpDS in inputDS: if tmpDS.endswith('/'): # container status,tmpRepMaps = self.getListDatasetReplicasInContainer(tmpDS) else: # normal dataset status,tmpRepMap = self.getListDatasetReplicas(tmpDS) tmpRepMaps = {tmpDS:tmpRepMap} if not status: # failed _logger.debug("%s failed" % self.token) return # make map per site for tmpDS,tmpRepMap in tmpRepMaps.iteritems(): for tmpSite,tmpStat in tmpRepMap.iteritems(): # ignore special sites if tmpSite in ['CERN-PROD_TZERO','CERN-PROD_DAQ','CERN-PROD_TMPDISK']: continue # ignore tape sites if tmpSite.endswith('TAPE'): continue # keep sites with unknown replica info if tmpStat[-1]['found'] == None: if not unknownSites.has_key(tmpDS): unknownSites[tmpDS] = [] unknownSites[tmpDS].append(tmpSite) # ignore ToBeDeleted if tmpStat[-1]['archived'] in ['ToBeDeleted',]: continue # change EOS if tmpSite.startswith('CERN-PROD_EOS'): tmpSite = 'CERN-PROD_EOS' # change EOS TMP if tmpSite.startswith('CERN-PROD_TMP'): tmpSite = 'CERN-PROD_TMP' # change DISK to SCRATCHDISK tmpSite = re.sub('_[^_-]+DISK$','',tmpSite) # change PERF-XYZ to SCRATCHDISK tmpSite = re.sub('_PERF-[^_-]+$','',tmpSite) # change PHYS-XYZ to SCRATCHDISK tmpSite = re.sub('_PHYS-[^_-]+$','',tmpSite) # patch for BNLPANDA if tmpSite in ['BNLPANDA']: tmpSite = 'BNL-OSG2' # add to map if not replicaMap.has_key(tmpSite): replicaMap[tmpSite] = {} replicaMap[tmpSite][tmpDS] = tmpStat[-1] _logger.debug("%s replica map -> %s" % (self.token,str(replicaMap))) # refresh replica info in needed self.refreshReplicaInfo(unknownSites) # instantiate SiteMapper siteMapper = SiteMapper(self.taskBuffer) # get original DDM origSiteDDM = self.getAggName(siteMapper.getSite(self.job.computingSite).ddm) # check all datasets maxDQ2Sites = [] if inputDS != []: # loop over all sites for tmpSite,tmpDsVal in replicaMap.iteritems(): # loop over all datasets appendFlag = True for tmpOrigDS in inputDS: # check completeness if tmpDsVal.has_key(tmpOrigDS) and tmpDsVal[tmpOrigDS]['found'] != None and \ tmpDsVal[tmpOrigDS]['total'] == tmpDsVal[tmpOrigDS]['found']: pass else: appendFlag = False # append if appendFlag: if not tmpSite in maxDQ2Sites: maxDQ2Sites.append(tmpSite) _logger.debug("%s candidate DQ2s -> %s" % (self.token,str(maxDQ2Sites))) if inputDS != [] and maxDQ2Sites == []: _logger.debug("%s no DQ2 candidate" % self.token) else: maxPandaSites = [] # original maxinputsize origMaxInputSize = siteMapper.getSite(self.job.computingSite).maxinputsize # look for Panda siteIDs for tmpSiteID,tmpSiteSpec in siteMapper.siteSpecList.iteritems(): # use ANALY_ only if not tmpSiteID.startswith('ANALY_'): continue # remove test and local if re.search('_test',tmpSiteID,re.I) != None: continue if re.search('_local',tmpSiteID,re.I) != None: continue # avoid same site if self.avoidSameSite and self.getAggName(tmpSiteSpec.ddm) == origSiteDDM: continue # check DQ2 ID if self.cloud in [None,tmpSiteSpec.cloud] \ and (self.getAggName(tmpSiteSpec.ddm) in maxDQ2Sites or inputDS == []): # excluded sites excludedFlag = False for tmpExcSite in self.excludedSite: if re.search(tmpExcSite,tmpSiteID) != None: excludedFlag = True break if excludedFlag: _logger.debug("%s skip %s since excluded" % (self.token,tmpSiteID)) continue # use online only if tmpSiteSpec.status != 'online': _logger.debug("%s skip %s status=%s" % (self.token,tmpSiteID,tmpSiteSpec.status)) continue # check maxinputsize if (maxFileSize == None and origMaxInputSize > siteMapper.getSite(tmpSiteID).maxinputsize) or \ maxFileSize > siteMapper.getSite(tmpSiteID).maxinputsize: _logger.debug("%s skip %s due to maxinputsize" % (self.token,tmpSiteID)) continue # append if not tmpSiteID in maxPandaSites: maxPandaSites.append(tmpSiteID) # choose at most 20 sites randomly to avoid too many lookup random.shuffle(maxPandaSites) maxPandaSites = maxPandaSites[:20] _logger.debug("%s candidate PandaSites -> %s" % (self.token,str(maxPandaSites))) # no Panda siteIDs if maxPandaSites == []: _logger.debug("%s no Panda site candidate" % self.token) else: # set AtlasRelease and cmtConfig to dummy job tmpJobForBrokerage = JobSpec() if self.job.AtlasRelease in ['NULL',None]: tmpJobForBrokerage.AtlasRelease = '' else: tmpJobForBrokerage.AtlasRelease = self.job.AtlasRelease # use nightlies matchNight = re.search('^AnalysisTransforms-.*_(rel_\d+)$',self.job.homepackage) if matchNight != None: tmpJobForBrokerage.AtlasRelease += ':%s' % matchNight.group(1) # use cache else: matchCache = re.search('^AnalysisTransforms-([^/]+)',self.job.homepackage) if matchCache != None: tmpJobForBrokerage.AtlasRelease = matchCache.group(1).replace('_','-') if not self.job.cmtConfig in ['NULL',None]: tmpJobForBrokerage.cmtConfig = self.job.cmtConfig # memory size if not self.job.minRamCount in ['NULL',None,0]: tmpJobForBrokerage.minRamCount = self.job.minRamCount # CPU count if not self.job.maxCpuCount in ['NULL',None,0]: tmpJobForBrokerage.maxCpuCount = self.job.maxCpuCount # run brokerage brokerage.broker.schedule([tmpJobForBrokerage],self.taskBuffer,siteMapper,forAnalysis=True, setScanSiteList=maxPandaSites,trustIS=True,reportLog=True) newSiteID = tmpJobForBrokerage.computingSite self.brokerageInfo += tmpJobForBrokerage.brokerageErrorDiag _logger.debug("%s runBrokerage - > %s" % (self.token,newSiteID)) # unknown site if not siteMapper.checkSite(newSiteID): _logger.error("%s unknown site" % self.token) _logger.debug("%s failed" % self.token) return # get new site spec newSiteSpec = siteMapper.getSite(newSiteID) # avoid repetition if self.getAggName(newSiteSpec.ddm) == origSiteDDM: _logger.debug("%s assigned to the same site %s " % (self.token,newSiteID)) _logger.debug("%s end" % self.token) return # simulation mode if self.simulation: _logger.debug("%s end simulation" % self.token) return # prepare jobs status = self.prepareJob(newSiteID,newSiteSpec) if status: # run SetUpper statusSetUp = self.runSetUpper() if not statusSetUp: _logger.debug("%s runSetUpper failed" % self.token) else: _logger.debug("%s successfully assigned to %s" % (self.token,newSiteID)) _logger.debug("%s end" % self.token) except: errType,errValue,errTraceBack = sys.exc_info() _logger.error("%s run() : %s %s" % (self.token,errType,errValue))
jobList = [] for i in range(1): job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i) job.AtlasRelease = 'Atlas-17.0.5' job.homepackage = 'AtlasProduction/17.0.5.6' job.transformation = 'Evgen_trf.py' job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 10000 job.prodSourceLabel = 'test' job.computingSite = site job.cloud = cloud job.cmtConfig = 'i686-slc5-gcc43-opt' file = FileSpec() file.lfn = "%s.evgen.pool.root" % job.jobName file.destinationDBlock = job.destinationDBlock file.destinationSE = job.destinationSE file.dataset = job.destinationDBlock file.destinationDBlockToken = 'ATLASDATADISK' file.type = 'output' job.addFile(file) fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % job.jobName fileOL.destinationDBlock = job.destinationDBlock fileOL.destinationSE = job.destinationSE fileOL.dataset = job.destinationDBlock
def master_prepare(self, app, appmasterconfig): # PandaTools from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaMCPandaRTHandler master_prepare called for %s', job.getFQID('.')) usertag = configDQ2['usertag'] #usertag='user09' nickname = getNickname(allowMissingNickname=True) self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % ( usertag, nickname, commands.getoutput('hostname').split('.')[0], int(time.time()), job.id) # self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity()) sources = 'sources.%s.tar.gz' % commands.getoutput( 'uuidgen 2> /dev/null') self.library = '%s.lib.tgz' % self.libDataset # check DBRelease # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1: # raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'") # unpack library logger.debug('Creating source tarball ...') tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null') os.mkdir(tmpdir) inputbox = [] if os.path.exists(app.transform_archive): # must add a condition on size. inputbox += [File(app.transform_archive)] if app.evgen_job_option: self.evgen_job_option = app.evgen_job_option if os.path.exists(app.evgen_job_option): # locally modified job option file to add to the input sand box inputbox += [File(app.evgen_job_option)] self.evgen_job_option = app.evgen_job_option.split("/")[-1] # add input sandbox files if (job.inputsandbox): for file in job.inputsandbox: inputbox += [file] # add option files for extFile in job.backend.extOutFile: try: shutil.copy(extFile, tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(extFile, tmpdir) # fill the archive for opt_file in inputbox: try: shutil.copy(opt_file.name, tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(opt_file.name, tmpdir) # now tar it up again inpw = job.getInputWorkspace() rc, output = commands.getstatusoutput('tar czf %s -C %s .' % (inpw.getPath(sources), tmpdir)) if rc: logger.error('Packing sources failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError(None, 'Packing sources failed.') shutil.rmtree(tmpdir) # upload sources logger.debug('Uploading source tarball ...') try: cwd = os.getcwd() os.chdir(inpw.getPath()) rc, output = Client.putFile(sources) if output != 'True': logger.error('Uploading sources %s failed. Status = %d', sources, rc) logger.error(output) raise ApplicationConfigurationError( None, 'Uploading archive failed') finally: os.chdir(cwd) # Use Panda's brokerage ## if job.inputdata and len(app.sites)>0: ## # update cloud, use inputdata's ## from dq2.info.TiersOfATLAS import whichCloud,ToACache ## inclouds=[] ## for site in app.sites: ## cloudSite=whichCloud(app.sites[0]) ## if cloudSite not in inclouds: ## inclouds.append(cloudSite) ## # now converting inclouds content into proper brokering stuff. ## outclouds=[] ## for cloudSite in inclouds: ## for cloudID, eachCloud in ToACache.dbcloud.iteritems(): ## if cloudSite==eachCloud: ## cloud=cloudID ## outclouds.append(cloud) ## break ## print outclouds ## # finally, matching with user's wishes ## if len(outclouds)>0: ## if not job.backend.requirements.cloud: # no user wish, update ## job.backend.requirements.cloud=outclouds[0] ## else: ## try: ## assert job.backend.requirements.cloud in outclouds ## except: ## raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds))) from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) if job.backend.site == 'AUTO': raise ApplicationConfigurationError( None, 'site is still AUTO after brokerage!') # output dataset preparation and registration try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] except: raise ApplicationConfigurationError( None, "Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site) if not app.dryrun: for outtype in app.outputpaths.keys(): dset = string.replace(app.outputpaths[outtype], "/", ".") dset = dset[1:] # dataset registration must be done only once. print "registering output dataset %s at %s" % (dset, outDsLocation) try: Client.addDataset(dset, False, location=outDsLocation) dq2_set_dataset_lifetime(dset, location=outDsLocation) except: raise ApplicationConfigurationError( None, "Fail to create output dataset %s. Aborting" % dset) # extend registration to build job lib dataset: print "registering output dataset %s at %s" % (self.libDataset, outDsLocation) try: Client.addDataset(self.libDataset, False, location=outDsLocation) dq2_set_dataset_lifetime(self.libDataset, outDsLocation) except: raise ApplicationConfigurationError( None, "Fail to create output dataset %s. Aborting" % self.libDataset) ### cacheVer = "-AtlasProduction_" + str(app.prod_release) logger.debug("master job submit?") self.outsite = job.backend.site if app.se_name and app.se_name != "none" and not self.outsite: self.outsite = app.se_name # create build job jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel jspec.homepackage = 'AnalysisTransforms' + cacheVer #+nightVer jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare() jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = 'panda' jspec.assignedPriority = 2000 jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud # jspec.jobParameters = self.args not known yet jspec.jobParameters = '-o %s' % (self.library) if app.userarea: print app.userarea jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea)) else: jspec.jobParameters += ' -i %s' % (sources) jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel) matchURL = re.search('(http.*://[^/]+)/', Client.baseURLSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1) fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) #print "MASTER JOB DETAILS:",jspec.jobParameters return jspec
index += 1 job = JobSpec() job.jobDefinitionID = (time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index) job.AtlasRelease = 'Atlas-15.3.1' job.homepackage = 'AtlasProduction/15.3.1.5' job.transformation = 'csc_atlasG4_trf.py' job.destinationDBlock = datasetName job.computingSite = site job.prodDBlock = prodDBlock job.prodSourceLabel = 'test' job.processingType = 'test' job.currentPriority = 10000 job.cloud = cloud job.cmtConfig = 'i686-slc4-gcc34-opt' fileI = FileSpec() fileI.dataset = job.prodDBlock fileI.prodDBlock = job.prodDBlock fileI.lfn = lfn fileI.type = 'input' job.addFile(fileI) fileD = FileSpec() fileD.dataset = 'ddo.000001.Atlas.Ideal.DBRelease.v070302' fileD.prodDBlock = fileD.dataset fileD.lfn = 'DBRelease-7.3.2.tar.gz' fileD.type = 'input' job.addFile(fileD)
job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),index) job.AtlasRelease = 'Atlas-12.0.6' job.homepackage = 'AtlasProduction/12.0.6.4' job.transformation = 'csc_reco_trf.py' job.destinationDBlock = datasetName job.destinationSE = destName job.computingSite = site #job.prodDBlock = 'misal1_mc12.005200.T1_McAtNlo_Jimmy.digit.RDO.v12000601_tid007554' job.prodDBlock = 'misal1_mc12.005802.JF17_pythia_jet_filter.digit.RDO.v12000601_tid008610' job.cloud = 'US' job.prodSourceLabel = 'test' job.currentPriority = 10000 job.cmtConfig = 'i686-slc4-gcc34-opt' fileI = FileSpec() fileI.dataset = job.prodDBlock fileI.prodDBlock = job.prodDBlock fileI.lfn = lfn fileI.type = 'input' job.addFile(fileI) fileD = FileSpec() fileD.dataset = 'ddo.000001.Atlas.Ideal.DBRelease.v030101' fileD.prodDBlock = 'ddo.000001.Atlas.Ideal.DBRelease.v030101' fileD.lfn = 'DBRelease-3.1.1.tar.gz' fileD.type = 'input' job.addFile(fileD)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """Prepare the specific aspec of each subjob. Returns: subjobconfig list of objects understood by backends.""" from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs # make sure we have the correct siteType refreshPandaSpecs() job = app._getParent() masterjob = job._getRoot() logger.debug('ProdTransPandaRTHandler prepare called for %s', job.getFQID('.')) job.backend.actualCE = job.backend.site job.backend.requirements.cloud = Client.PandaSites[ job.backend.site]['cloud'] # check that the site is in a submit-able status if not job.splitter or job.splitter._name != 'DQ2JobSplitter': allowed_sites = job.backend.list_ddm_sites() try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] tmpDsExist = False if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')): #if Client.getDatasets(job.outputdata.datasetname): if getDatasets(job.outputdata.datasetname): tmpDsExist = True logger.info('Re-using output dataset %s' % job.outputdata.datasetname) if not configPanda[ 'specialHandling'] == 'ddm:rucio' and not configPanda[ 'processingType'].startswith( 'gangarobot' ) and not configPanda['processingType'].startswith( 'hammercloud') and not configPanda[ 'processingType'].startswith('rucio_test'): Client.addDataset(job.outputdata.datasetname, False, location=outDsLocation, allowProdDisk=True, dsExist=tmpDsExist) logger.info('Output dataset %s registered at %s' % (job.outputdata.datasetname, outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation) except exceptions.SystemExit: raise BackendError( 'Panda', 'Exception in adding dataset %s: %s %s' % (job.outputdata.datasetname, sys.exc_info()[0], sys.exc_info()[1])) # JobSpec. jspec = JobSpec() jspec.currentPriority = app.priority jspec.jobDefinitionID = masterjob.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.coreCount = app.core_count jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release jspec.homepackage = app.home_package jspec.transformation = app.transformation jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = job.backend.site if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' if app.prod_source_label: jspec.prodSourceLabel = app.prod_source_label else: jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.specialHandling = configPanda['specialHandling'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.cmtConfig = app.atlas_cmtconfig if app.dbrelease == 'LATEST': try: latest_dbrelease = getLatestDBReleaseCaching() except: from pandatools import Client latest_dbrelease = Client.getLatestDBRelease() m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease) if m: self.dbrelease_dataset = m.group(1) self.dbrelease = m.group(2) else: raise ApplicationConfigurationError( None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually." ) else: self.dbrelease_dataset = app.dbrelease_dataset self.dbrelease = app.dbrelease jspec.jobParameters = app.job_parameters if self.dbrelease: if self.dbrelease == 'current': jspec.jobParameters += ' --DBRelease=current' else: if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % ( self.dbrelease, ) else: jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % ( self.dbrelease, ) dbspec = FileSpec() dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease dbspec.dataset = self.dbrelease_dataset dbspec.prodDBlock = jspec.prodDBlock dbspec.type = 'input' jspec.addFile(dbspec) if job.inputdata: m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)', job.inputdata.dataset[0]) if not m: logger.error("Error retrieving run number from dataset name") #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name") runnumber = 105200 else: runnumber = int(m.group(2)) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --runNumber %d' % runnumber else: jspec.jobParameters += ' RunNumber=%d' % runnumber # Output files. randomized_lfns = [] ilfn = 0 for lfn, lfntype in zip(app.output_files, app.output_type): ofspec = FileSpec() if app.randomize_lfns: randomized_lfn = lfn + ( '.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4])) else: randomized_lfn = lfn ofspec.lfn = randomized_lfn randomized_lfns.append(randomized_lfn) ofspec.destinationDBlock = jspec.destinationDBlock ofspec.destinationSE = jspec.destinationSE ofspec.dataset = jspec.destinationDBlock ofspec.type = 'output' jspec.addFile(ofspec) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --output%sFile %s' % ( lfntype, randomized_lfns[ilfn]) else: jspec.jobParameters += ' output%sFile=%s' % ( lfntype, randomized_lfns[ilfn]) ilfn = ilfn + 1 # Input files. if job.inputdata: for guid, lfn, size, checksum, scope in zip( job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): ifspec = FileSpec() ifspec.lfn = lfn ifspec.GUID = guid ifspec.fsize = size ifspec.md5sum = checksum ifspec.scope = scope ifspec.dataset = jspec.prodDBlock ifspec.prodDBlock = jspec.prodDBlock ifspec.type = 'input' jspec.addFile(ifspec) if app.input_type: itype = app.input_type else: itype = m.group(5) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join( job.inputdata.names)) else: jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join( job.inputdata.names)) # Log files. lfspec = FileSpec() lfspec.lfn = '%s.job.log.tgz' % jspec.jobName lfspec.destinationDBlock = jspec.destinationDBlock lfspec.destinationSE = jspec.destinationSE lfspec.dataset = jspec.destinationDBlock lfspec.type = 'log' jspec.addFile(lfspec) return jspec
job.jobDefinitionID = int(time.time()) % 10000 job.jobName = "%s_1" % commands.getoutput('uuidgen') job.AtlasRelease = 'Atlas-20.1.4' job.homepackage = 'AtlasProduction/20.1.4.14' #job.AtlasRelease = 'Atlas-20.20.8' #job.homepackage = 'AtlasProduction/20.20.8.4' job.transformation = 'Reco_tf.py' job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('uuidgen') job.destinationSE = 'AGLT2_TEST' job.prodDBlock = 'user.mlassnig:user.mlassnig.pilot.test.single.hits' job.currentPriority = 1000 #job.prodSourceLabel = 'ptest' job.prodSourceLabel = 'user' job.computingSite = site job.cloud = cloud job.cmtConfig = 'x86_64-slc6-gcc48-opt' job.specialHandling = 'ddm:rucio' #job.transferType = 'direct' ifile = 'HITS.06828093._000096.pool.root.1' fileI = FileSpec() fileI.GUID = 'AC5B3759-B606-BA42-8681-4BD86455AE02' fileI.checksum = 'ad:5d000974' fileI.dataset = 'user.mlassnig:user.mlassnig.pilot.test.single.hits' fileI.fsize = 94834717 fileI.lfn = ifile fileI.prodDBlock = job.prodDBlock fileI.scope = 'mc15_13TeV' fileI.type = 'input' job.addFile(fileI)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """Prepare the specific aspec of each subjob. Returns: subjobconfig list of objects understood by backends.""" from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs # make sure we have the correct siteType refreshPandaSpecs() job = app._getParent() masterjob = job._getRoot() logger.debug('ProdTransPandaRTHandler prepare called for %s', job.getFQID('.')) job.backend.actualCE = job.backend.site job.backend.requirements.cloud = Client.PandaSites[job.backend.site]['cloud'] # check that the site is in a submit-able status if not job.splitter or job.splitter._name != 'DQ2JobSplitter': allowed_sites = job.backend.list_ddm_sites() try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] tmpDsExist = False if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')): #if Client.getDatasets(job.outputdata.datasetname): if getDatasets(job.outputdata.datasetname): tmpDsExist = True logger.info('Re-using output dataset %s'%job.outputdata.datasetname) if not configPanda['specialHandling']=='ddm:rucio' and not configPanda['processingType'].startswith('gangarobot') and not configPanda['processingType'].startswith('hammercloud') and not configPanda['processingType'].startswith('rucio_test'): Client.addDataset(job.outputdata.datasetname,False,location=outDsLocation,allowProdDisk=True,dsExist=tmpDsExist) logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation) except exceptions.SystemExit: raise BackendError('Panda','Exception in adding dataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1])) # JobSpec. jspec = JobSpec() jspec.currentPriority = app.priority jspec.jobDefinitionID = masterjob.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.coreCount = app.core_count jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release jspec.homepackage = app.home_package jspec.transformation = app.transformation jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = job.backend.site if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' if app.prod_source_label: jspec.prodSourceLabel = app.prod_source_label else: jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.specialHandling = configPanda['specialHandling'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.cmtConfig = app.atlas_cmtconfig if app.dbrelease == 'LATEST': try: latest_dbrelease = getLatestDBReleaseCaching() except: from pandatools import Client latest_dbrelease = Client.getLatestDBRelease() m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease) if m: self.dbrelease_dataset = m.group(1) self.dbrelease = m.group(2) else: raise ApplicationConfigurationError(None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually.") else: self.dbrelease_dataset = app.dbrelease_dataset self.dbrelease = app.dbrelease jspec.jobParameters = app.job_parameters if self.dbrelease: if self.dbrelease == 'current': jspec.jobParameters += ' --DBRelease=current' else: if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,) else: jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,) dbspec = FileSpec() dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease dbspec.dataset = self.dbrelease_dataset dbspec.prodDBlock = jspec.prodDBlock dbspec.type = 'input' jspec.addFile(dbspec) if job.inputdata: m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)', job.inputdata.dataset[0]) if not m: logger.error("Error retrieving run number from dataset name") #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name") runnumber = 105200 else: runnumber = int(m.group(2)) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --runNumber %d' % runnumber else: jspec.jobParameters += ' RunNumber=%d' % runnumber # Output files. randomized_lfns = [] ilfn = 0 for lfn, lfntype in zip(app.output_files,app.output_type): ofspec = FileSpec() if app.randomize_lfns: randomized_lfn = lfn + ('.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4] ) ) else: randomized_lfn = lfn ofspec.lfn = randomized_lfn randomized_lfns.append(randomized_lfn) ofspec.destinationDBlock = jspec.destinationDBlock ofspec.destinationSE = jspec.destinationSE ofspec.dataset = jspec.destinationDBlock ofspec.type = 'output' jspec.addFile(ofspec) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --output%sFile %s' % (lfntype, randomized_lfns[ilfn]) else: jspec.jobParameters += ' output%sFile=%s' % (lfntype, randomized_lfns[ilfn]) ilfn=ilfn+1 # Input files. if job.inputdata: for guid, lfn, size, checksum, scope in zip(job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): ifspec = FileSpec() ifspec.lfn = lfn ifspec.GUID = guid ifspec.fsize = size ifspec.md5sum = checksum ifspec.scope = scope ifspec.dataset = jspec.prodDBlock ifspec.prodDBlock = jspec.prodDBlock ifspec.type = 'input' jspec.addFile(ifspec) if app.input_type: itype = app.input_type else: itype = m.group(5) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(job.inputdata.names)) else: jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(job.inputdata.names)) # Log files. lfspec = FileSpec() lfspec.lfn = '%s.job.log.tgz' % jspec.jobName lfspec.destinationDBlock = jspec.destinationDBlock lfspec.destinationSE = jspec.destinationSE lfspec.dataset = jspec.destinationDBlock lfspec.type = 'log' jspec.addFile(lfspec) return jspec
index += 1 job = JobSpec() job.jobDefinitionID = (time.time()) % 10000 job.jobName = "%s_%d" % (commands.getoutput('uuidgen'),index) job.AtlasRelease = 'Atlas-17.0.5' job.homepackage = 'AtlasProduction/17.0.5.6' job.transformation = 'AtlasG4_trf.py' job.destinationDBlock = datasetName job.computingSite = site job.prodDBlock = prodDBlock job.prodSourceLabel = 'test' job.processingType = 'test' job.currentPriority = 10000 job.cloud = cloud job.cmtConfig = 'i686-slc5-gcc43-opt' fileI = FileSpec() fileI.dataset = job.prodDBlock fileI.prodDBlock = job.prodDBlock fileI.lfn = lfn fileI.type = 'input' job.addFile(fileI) fileD = FileSpec() fileD.dataset = 'ddo.000001.Atlas.Ideal.DBRelease.v170602' fileD.prodDBlock = fileD.dataset fileD.lfn = 'DBRelease-17.6.2.tar.gz' fileD.type = 'input' job.addFile(fileD)
def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): '''prepare the subjob specific configuration''' # PandaTools from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.')) try: assert self.outsite except: logger.error("outsite not set. Aborting") raise Exception() job.backend.site = self.outsite job.backend.actualCE = self.outsite cloud = job._getRoot().backend.requirements.cloud job.backend.requirements.cloud = cloud # now just filling the job from AthenaMC data jspec = JobSpec() jspec.jobDefinitionID = job._getRoot().id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel if app.transform_archive: jspec.homepackage = 'AnalysisTransforms' + app.transform_archive elif app.prod_release: jspec.homepackage = 'AnalysisTransforms-AtlasProduction_' + str( app.prod_release) jspec.transformation = '%s/runAthena-00-00-11' % Client.baseURLSUB #---->???? prodDBlock and destinationDBlock when facing several input / output datasets? jspec.prodDBlock = 'NULL' if job.inputdata and len( app.inputfiles) > 0 and app.inputfiles[0] in app.dsetmap: jspec.prodDBlock = app.dsetmap[app.inputfiles[0]] # How to specify jspec.destinationDBlock when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset outdset = "" for type in ["EVNT", "RDO", "HITS", "AOD", "ESD", "NTUP"]: if type in app.outputpaths.keys(): outdset = string.replace(app.outputpaths[type], "/", ".") outdset = outdset[1:-1] break if not outdset: try: assert len(app.outputpaths.keys()) > 0 except: logger.error( "app.outputpaths is empty: check your output datasets") raise type = app.outputpaths.keys()[0] outdset = string.replace(app.outputpaths[type], "/", ".") outdset = outdset[1:-1] jspec.destinationDBlock = outdset jspec.destinationSE = self.outsite jspec.prodSourceLabel = 'user' jspec.assignedPriority = 1000 jspec.cloud = cloud # memory if job.backend.requirements.memory != -1: jspec.minRamCount = job.backend.requirements.memory jspec.computingSite = self.outsite jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel) # library (source files) flib = FileSpec() flib.lfn = self.library # flib.GUID = flib.type = 'input' # flib.status = flib.dataset = self.libDataset flib.dispatchDBlock = self.libDataset jspec.addFile(flib) # input files FIXME: many more input types for lfn in app.inputfiles: useguid = app.turls[lfn].replace("guid:", "") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # add dbfiles if any: for lfn in app.dbfiles: useguid = app.dbturls[lfn].replace("guid:", "") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # then minbias files for lfn in app.mbfiles: useguid = app.minbias_turls[lfn].replace("guid:", "") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # then cavern files for lfn in app.cavernfiles: useguid = app.cavern_turls[lfn].replace("guid:", "") finp = FileSpec() finp.lfn = lfn finp.GUID = useguid finp.dataset = app.dsetmap[lfn] finp.prodDBlock = app.dsetmap[lfn] finp.prodDBlockToken = 'local' finp.dispatchDBlock = app.dsetmap[lfn] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # output files( this includes the logfiles) # Output files jidtag = "" job = app._getParent() # Returns job or subjob object if job._getRoot().subjobs: jidtag = job._getRoot().id else: jidtag = "%d" % job.id outfiles = app.subjobsOutfiles[job.id] pandaOutfiles = {} for type in outfiles.keys(): pandaOutfiles[type] = outfiles[type] + "." + str(jidtag) if type == "LOG": pandaOutfiles[type] += ".tgz" #print pandaOutfiles for outtype in pandaOutfiles.keys(): fout = FileSpec() dset = string.replace(app.outputpaths[outtype], "/", ".") dset = dset[1:-1] fout.dataset = dset fout.lfn = pandaOutfiles[outtype] fout.type = 'output' # fout.destinationDBlock = jspec.destinationDBlock fout.destinationDBlock = fout.dataset fout.destinationSE = jspec.destinationSE if outtype == 'LOG': fout.type = 'log' fout.destinationDBlock = fout.dataset fout.destinationSE = job.backend.site jspec.addFile(fout) # job parameters param = '-l %s ' % self.library # user tarball. # use corruption checker if job.backend.requirements.corCheck: param += '--corCheck ' # disable to skip missing files if job.backend.requirements.notSkipMissing: param += '--notSkipMissing ' # transform parameters # need to update arglist with final output file name... newArgs = [] if app.mode == "evgen": app.args[3] = app.args[3] + " -t " if app.verbosity: app.args[3] = app.args[3] + " -l %s " % app.verbosity for arg in app.args[3:]: for type in outfiles.keys(): if arg.find(outfiles[type]) > -1: arg = arg.replace(outfiles[type], pandaOutfiles[type]) newArgs.append(arg) arglist = string.join(newArgs, " ") # print "Arglist:",arglist param += ' -r ./ ' param += ' -j "%s"' % urllib.quote(arglist) allinfiles = app.inputfiles + app.dbfiles # Input files. param += ' -i "%s" ' % allinfiles if len(app.mbfiles) > 0: param += ' -m "%s" ' % app.mbfiles if len(app.cavernfiles) > 0: param += ' -n "%s" ' % app.cavernfiles # param += '-m "[]" ' #%minList FIXME # param += '-n "[]" ' #%cavList FIXME del pandaOutfiles[ "LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore... param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items()) # source URL matchURL = re.search("(http.*://[^/]+)/", Client.baseURLSSL) if matchURL != None: param += " --sourceURL %s " % matchURL.group(1) param += " --trf" jspec.jobParameters = param jspec.metadata = "--trf \"%s\"" % arglist #print "SUBJOB DETAILS:",jspec.values() if app.dryrun: print "job.application.dryrun activated, printing out job parameters" print jspec.values() return return jspec