def createWorkflow(self, runNumber, primaryDataset, processedDataset, dataTier): """ _createWorkflow_ Create a workflow for a given run and primary dataset. If the workflow has been created previously, load it and use it. """ jobCache = os.path.join(self.args["ComponentDir"], "T0ASTPlugin", "Run" + runNumber) if not os.path.exists(jobCache): os.makedirs(jobCache) workflowSpecFileName = "DQMHarvest-Run%s-%s-workflow.xml" % (runNumber, primaryDataset) workflowSpecPath = os.path.join(jobCache, workflowSpecFileName) if os.path.exists(workflowSpecPath): msg = "Loading existing workflow for dataset: %s\n " % primaryDataset msg += " => %s\n" % workflowSpecPath logging.info(msg) workflowSpec = WorkflowSpec() workflowSpec.load(workflowSpecPath) return (workflowSpec, workflowSpecPath) msg = "No workflow found for dataset: %s\n " % primaryDataset msg += "Looking up software version and generating workflow..." recoConfig = self.t0astWrapper.listRecoConfig(runNumber, primaryDataset) if not recoConfig["DO_RECO"]: logging.info("RECO disabled for dataset %s" % primaryDataset) return (None, None) globalTag = self.args.get("OverrideGlobalTag", None) if globalTag == None: globalTag = recoConfig["GLOBAL_TAG"] cmsswVersion = self.args.get("OverrideCMSSW", None) if cmsswVersion == None: cmsswVersion = recoConfig["CMSSW_VERSION"] datasetPath = "/%s/%s/%s" % (primaryDataset, processedDataset, dataTier) workflowSpec = createHarvestingWorkflow(datasetPath, self.site, self.args["CmsPath"], self.args["ScramArch"], cmsswVersion, globalTag, configFile=self.args["ConfigFile"], DQMServer=self.args['DQMServer'], proxyLocation=self.args['proxyLocation'], DQMCopyToCERN=self.args['DQMCopyToCERN'], doStageOut=self.args['DoStageOut']) workflowSpec.save(workflowSpecPath) msg = "Created Harvesting Workflow:\n %s" % workflowSpecPath logging.info(msg) self.publishWorkflow(workflowSpecPath, workflowSpec.workflowName()) return (workflowSpec, workflowSpecPath)
def createJobSpec(jobSpecId, workflowSpecFile, filename, runNumber, eventCount, firstEvent=None, saveString=False, loadString=True): # // # // Load workflow #// workflowSpec = WorkflowSpec() if loadString: workflowSpec.loadString(workflowSpecFile) else: workflowSpec.load(workflowSpecFile) # // # // Create JobSpec #// jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % (workflowSpec.workflowName(), runNumber) #jobSpec.setJobName(jobName) jobSpec.setJobName(jobSpecId) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = runNumber jobSpec.parameters['EventCount'] = eventCount jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) if firstEvent != None: jobSpec.parameters['FirstEvent'] = firstEvent cfgMaker = ConfigGenerator(jobSpec) jobSpec.payload.operate(cfgMaker) if saveString: return jobSpec.saveString() jobSpec.save(filename) return
def createJobSpec(jobSpecId,workflowSpecFile, filename, runNumber, eventCount, firstEvent = None,saveString=False,loadString=True): # // # // Load workflow #// workflowSpec = WorkflowSpec() if loadString: workflowSpec.loadString(workflowSpecFile) else: workflowSpec.load(workflowSpecFile) # // # // Create JobSpec #// jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % ( workflowSpec.workflowName(), runNumber ) #jobSpec.setJobName(jobName) jobSpec.setJobName(jobSpecId) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = runNumber jobSpec.parameters['EventCount'] = eventCount jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) if firstEvent != None: jobSpec.parameters['FirstEvent'] = firstEvent cfgMaker = ConfigGenerator(jobSpec) jobSpec.payload.operate(cfgMaker) if saveString: return jobSpec.saveString() jobSpec.save(filename) return
def __call__(self, collectPayload): """ _operator(collectPayload)_ Given the dataset and run in the payload, callout to DBS to find the files to be harvested """ msg = "DBSPlugin invoked for %s" % str(collectPayload) logging.info(msg) site = self.args.get("Site", "srm.cern.ch") baseCache = os.path.join(self.args['ComponentDir'], "DBSPlugin") if not os.path.exists(baseCache): os.makedirs(baseCache) datasetCache = os.path.join(baseCache, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) if not os.path.exists(datasetCache): os.makedirs(datasetCache) workflowFile = os.path.join( datasetCache, "%s-%s-%s-DQMHarvest-Workflow.xml" % ( collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) ) if not os.path.exists(workflowFile): msg = "No workflow found for dataset: %s\n " % ( collectPayload.datasetPath(),) msg += "Looking up software version and generating workflow..." if self.args.get("OverrideGlobalTag", None) == None: globalTag = findGlobalTagForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) else: globalTag = self.args['OverrideGlobalTag'] if self.args.get("OverrideCMSSW", None) != None: cmsswVersion = self.args['OverrideCMSSW'] msg = "Using Override for CMSSW Version %s" % ( self.args['OverrideCMSSW'],) logging.info(msg) else: cmsswVersion = findVersionForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) msg = "Found CMSSW Version for dataset/run\n" msg += " Dataset %s Run %s\n" % (collectPayload.datasetPath(), collectPayload['RunNumber']) msg += " CMSSW Version = %s\n " % cmsswVersion logging.info(msg) workflowSpec = createHarvestingWorkflow( collectPayload.datasetPath(), site, self.args['CmsPath'], self.args['ScramArch'], cmsswVersion, globalTag, configFile=self.args['ConfigFile'], DQMServer=self.args['DQMServer'], proxyLocation=self.args['proxyLocation'], DQMCopyToCERN=self.args['DQMCopyToCERN'], doStageOut=self.args['DoStageOut']) workflowSpec.save(workflowFile) msg = "Created Harvesting Workflow:\n %s" % workflowFile logging.info(msg) self.publishWorkflow(workflowFile, workflowSpec.workflowName()) else: msg = "Loading existing workflow for dataset: %s\n " % ( collectPayload.datasetPath(),) msg += " => %s\n" % workflowFile logging.info(msg) workflowSpec = WorkflowSpec() workflowSpec.load(workflowFile) job = {} jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s-%s" % ( workflowSpec.workflowName(), collectPayload['RunNumber'], time.strftime("%H-%M-%S-%d-%m-%y") ) jobSpec.setJobName(jobName) jobSpec.setJobType("Harvesting") jobSpec.parameters['RunNumber'] = collectPayload['RunNumber'] jobSpec.addWhitelistSite(site) jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) jobSpec.payload.cfgInterface.inputFiles.extend( listFilesInRun( DBSReader(self.dbsUrl), collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) ) specCacheDir = os.path.join( datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4)) if not os.path.exists(specCacheDir): os.makedirs(specCacheDir) jobSpecFile = os.path.join(specCacheDir, "%s-JobSpec.xml" % jobName) jobSpec.save(jobSpecFile) job["JobSpecId"] = jobName job["JobSpecFile"] = jobSpecFile job['JobType'] = "Harvesting" job["WorkflowSpecId"] = workflowSpec.workflowName(), job["WorkflowPriority"] = 10 job["Sites"] = [site] job["Run"] = collectPayload['RunNumber'] job['WorkflowSpecFile'] = workflowFile msg = "Harvesting Job Created for\n" msg += " => Run: %s\n" % collectPayload['RunNumber'] msg += " => Primary: %s\n" % collectPayload['PrimaryDataset'] msg += " => Processed: %s\n" % collectPayload['ProcessedDataset'] msg += " => Tier: %s\n" % collectPayload['DataTier'] msg += " => Workflow: %s\n" % job['WorkflowSpecId'] msg += " => Job: %s\n" % job['JobSpecId'] msg += " => Site: %s\n" % job['Sites'] logging.info(msg) return [job]
from JobQueue.JobQueueDB import JobQueueDB import ProdAgent.WorkflowEntities.Aux as WEAux import ProdAgent.WorkflowEntities.Workflow as WEWorkflow workflow = sys.argv[1] workflowSpec = WorkflowSpec() workflowSpec.load(workflow) # // # // Clean out job cache #// config = loadProdAgentConfiguration() compCfg = config.getConfig("JobCreator") creatorCache = os.path.expandvars(compCfg['ComponentDir']) workflowCache = os.path.join(creatorCache, workflowSpec.workflowName()) if os.path.exists(workflowCache): os.system("/bin/rm -rf %s" % workflowCache) Session.set_database(dbConfig) Session.connect() Session.start_transaction() # // # // clean out queue #// jobQ = JobQueueDB() jobQ.removeWorkflow(workflowSpec.workflowName())
class RequestIterator: """ _RequestIterator_ Working from a Generic Workflow template, generate concrete jobs from it, keeping in-memory history """ def __init__(self, workflowSpecFile, workingDir): self.workflow = workflowSpecFile self.workingDir = workingDir self.count = 0 self.runIncrement = 1 self.currentJob = None self.sitePref = None self.pileupDatasets = {} self.ownedJobSpecs = {} # // # // Initially hard coded, should be extracted from Component Config #// self.eventsPerJob = 10 self.workflowSpec = WorkflowSpec() try: self.workflowSpec.load(workflowSpecFile) except: logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile)) return if self.workflowSpec.parameters.get("RunIncrement", None) != None: self.runIncrement = int( self.workflowSpec.parameters['RunIncrement'] ) self.generators = GeneratorMaker() self.workflowSpec.payload.operate(self.generators) # // # // Cache Area for JobSpecs #// self.specCache = os.path.join( self.workingDir, "%s-Cache" %self.workflowSpec.workflowName()) if not os.path.exists(self.specCache): os.makedirs(self.specCache) def loadPileupDatasets(self): """ _loadPileupDatasets_ Are we dealing with pileup? If so pull in the file list """ puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) self.pileupDatasets = createPileupDatasets(self.workflowSpec) return def loadPileupSites(self): """ _loadPileupSites_ Are we dealing with pileup? If so pull in the site list """ sites = [] puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) sites = getPileupSites(self.workflowSpec) return sites def __call__(self): """ _operator()_ When called generate a new concrete job payload from the generic workflow and return it. """ newJobSpec = self.createJobSpec() self.count += self.runIncrement return newJobSpec def createJobSpec(self): """ _createJobSpec_ Load the WorkflowSpec object and generate a JobSpec from it """ jobSpec = self.workflowSpec.createJobSpec() jobName = "%s-%s" % ( self.workflowSpec.workflowName(), self.count, ) self.currentJob = jobName jobSpec.setJobName(jobName) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = self.count jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) jobSpec.payload.operate(self.generateJobConfig) jobSpec.payload.operate(self.generateCmsGenConfig) specCacheDir = os.path.join( self.specCache, str(self.count // 1000).zfill(4)) if not os.path.exists(specCacheDir): os.makedirs(specCacheDir) jobSpecFile = os.path.join(specCacheDir, "%s-JobSpec.xml" % jobName) self.ownedJobSpecs[jobName] = jobSpecFile # // # // Add site pref if set #// if self.sitePref != None: # #AF: Allow site pref to be a comma separated list of sites, each one # added in the Whitelist: # jobSpec.addWhitelistSite(self.sitePref) for siteWhite in self.sitePref.split(","): jobSpec.addWhitelistSite(siteWhite) jobSpec.save(jobSpecFile) return jobSpecFile def generateJobConfig(self, jobSpecNode): """ _generateJobConfig_ Operator to act on a JobSpecNode tree to convert the template config file into a JobSpecific Config File """ if jobSpecNode.name not in self.generators.keys(): return generator = self.generators[jobSpecNode.name] useOutputMaxEv = False if jobSpecNode.cfgInterface != None: outMaxEv = jobSpecNode.cfgInterface.maxEvents['output'] if outMaxEv != None: useOutputMaxEv = True if useOutputMaxEv: jobCfg = generator(self.currentJob, maxEventsWritten = self.eventsPerJob, firstRun = self.count) else: jobCfg = generator(self.currentJob, maxEvents = self.eventsPerJob, firstRun = self.count) # // # // Is there pileup for this node? #// if self.pileupDatasets.has_key(jobSpecNode.name): puDataset = self.pileupDatasets[jobSpecNode.name] logging.debug("Node: %s has a pileup dataset: %s" % ( jobSpecNode.name, puDataset.dataset, )) fileList = puDataset.getPileupFiles() jobCfg.pileupFiles = fileList jobSpecNode.cfgInterface = jobCfg return def generateCmsGenConfig(self, jobSpecNode): """ _generateCmsGenConfig_ Process CmsGen type nodes to insert maxEvents and run numbers for cmsGen jobs """ if jobSpecNode.type != "CmsGen": return jobSpecNode.applicationControls['firstRun'] = self.count jobSpecNode.applicationControls['maxEvents'] = self.eventsPerJob jobSpecNode.applicationControls['randomSeed'] = randomSeed() jobSpecNode.applicationControls['fileName'] = "%s-%s.root" % ( self.currentJob, jobSpecNode.name) jobSpecNode.applicationControls['logicalFileName'] = "%s-%s.root" % ( self.currentJob, jobSpecNode.name) return def removeSpec(self, jobSpecId): """ _removeSpec_ Remove a Spec file when it has been successfully injected """ if jobSpecId not in self.ownedJobSpecs.keys(): return logging.info("Removing JobSpec For: %s" % jobSpecId) filename = self.ownedJobSpecs[jobSpecId] if os.path.exists(filename): os.remove(filename) del self.ownedJobSpecs[jobSpecId] return def save(self, directory): """ _save_ Persist this objects state into an XML file and save it in the directory provided """ doc = IMProvDoc("RequestIterator") node = IMProvNode(self.workflowSpec.workflowName()) doc.addNode(node) node.addNode(IMProvNode("Run", None, Value = str(self.count))) node.addNode( IMProvNode("EventsPerJob", None, Value = str(self.eventsPerJob)) ) node.addNode(IMProvNode("SitePref", None, Value = str(self.sitePref))) pu = IMProvNode("Pileup") node.addNode(pu) for key, value in self.pileupDatasets.items(): puNode = value.save() puNode.attrs['PayloadNode'] = key pu.addNode(puNode) specs = IMProvNode("JobSpecs") node.addNode(specs) for key, val in self.ownedJobSpecs.items(): specs.addNode(IMProvNode("JobSpec", val, ID = key)) fname = os.path.join( directory, "%s-Persist.xml" % self.workflowSpec.workflowName() ) handle = open(fname, 'w') handle.write(doc.makeDOMDocument().toprettyxml()) handle.close() return def load(self, directory): """ _load_ Load this instance given the workflow and directory containing the persistency file """ fname = os.path.join( directory, "%s-Persist.xml" % self.workflowSpec.workflowName() ) try: node = loadIMProvFile(fname) except Exception, ex: msg = "ERROR: Corrupted Persistency File:\n" msg += " => %s\n" % fname msg += "Cannot be read:\n => %s\n" % str(ex) logging.error(msg) return qbase = "/RequestIterator/%s" % self.workflowSpec.workflowName() runQ = IMProvQuery("%s/Run[attribute(\"Value\")]" % qbase) eventQ = IMProvQuery("%s/EventsPerJob[attribute(\"Value\")]" % qbase) siteQ = IMProvQuery("%s/SitePref[attribute(\"Value\")]" % qbase) runVal = int(runQ(node)[-1]) eventVal = int(eventQ(node)[-1]) siteVal = str(siteQ(node)[-1]) if siteVal.lower() == "none": siteVal = None self.count = runVal self.eventsPerJob = eventVal self.sitePref = siteVal puQ = IMProvQuery("%s/Pileup/*" % qbase) puNodes = puQ(node) for puNode in puNodes: payloadNode = str(puNode.attrs.get("PayloadNode")) puDataset = PileupDataset("dummy", 1) puDataset.load(puNode) self.pileupDatasets[payloadNode] = puDataset specQ = IMProvQuery("%s/JobSpecs/*" % qbase) specNodes = specQ(node) for specNode in specNodes: specId = str(specNode.attrs['ID']) specFile = str(specNode.chardata).strip() self.ownedJobSpecs[specId] = specFile return
class DatasetIterator: """ _DatasetIterator_ Working from a Generic Workflow template, generate concrete jobs from it, keeping in-memory history """ def __init__(self, workflowSpecFile, workingDir): self.workflow = workflowSpecFile self.workingDir = workingDir self.currentJob = None self.workflowSpec = WorkflowSpec() self.workflowSpec.load(workflowSpecFile) self.currentJobDef = None self.count = 0 self.onlyClosedBlocks = False if self.workflowSpec.parameters.has_key("OnlyClosedBlocks"): onlyClosed = str( self.workflowSpec.parameters["OnlyClosedBlocks"]).lower() if onlyClosed == "true": self.onlyClosedBlocks = True self.ownedJobSpecs = {} self.allowedBlocks = [] self.allowedSites = [] self.dbsUrl = getLocalDBSURL() self.splitType = \ self.workflowSpec.parameters.get("SplitType", "file").lower() self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1)) self.generators = GeneratorMaker() self.generators(self.workflowSpec.payload) self.pileupDatasets = {} # // # // Does the workflow contain a block restriction?? #// blockRestriction = \ self.workflowSpec.parameters.get("OnlyBlocks", None) if blockRestriction != None: # // # // restriction on blocks present, populate allowedBlocks list #// msg = "Block restriction provided in Workflow Spec:\n" msg += "%s\n" % blockRestriction blockList = blockRestriction.split(",") for block in blockList: if len(block.strip() ) > 0: self.allowedBlocks.append(block.strip()) # // # // Does the workflow contain a site restriction?? #// siteRestriction = \ self.workflowSpec.parameters.get("OnlySites", None) if siteRestriction != None: # // # // restriction on sites present, populate allowedSites list #// msg = "Site restriction provided in Workflow Spec:\n" msg += "%s\n" % siteRestriction siteList = siteRestriction.split(",") for site in siteList: if len(site.strip() ) > 0: self.allowedSites.append(site.strip()) # // # // Is the DBSURL contact information provided?? #// value = self.workflowSpec.parameters.get("DBSURL", None) if value != None: self.dbsUrl = value if self.dbsUrl == None: msg = "Error: No DBSURL available for dataset:\n" msg += "Cant get local DBSURL and one not provided with workflow" raise RuntimeError, msg # // # // Cache Area for JobSpecs #// self.specCache = os.path.join( self.workingDir, "%s-Cache" %self.workflowSpec.workflowName()) if not os.path.exists(self.specCache): os.makedirs(self.specCache) def __call__(self, jobDef): """ _operator()_ When called generate a new concrete job payload from the generic workflow and return it. The JobDef should be a JobDefinition with the input details including LFNs and event ranges etc. """ newJobSpec = self.createJobSpec(jobDef) self.count += 1 return newJobSpec def loadPileupDatasets(self): """ _loadPileupDatasets_ Are we dealing with pileup? If so pull in the file list """ puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) self.pileupDatasets = createPileupDatasets(self.workflowSpec) return def loadPileupSites(self): """ _loadPileupSites_ Are we dealing with pileup? If so pull in the site list """ sites = [] puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) sites = getPileupSites(self.workflowSpec) return sites def inputDataset(self): """ _inputDataset_ Extract the input Dataset from this workflow """ topNode = self.workflowSpec.payload try: inputDataset = topNode._InputDatasets[-1] except StandardError, ex: msg = "Error extracting input dataset from Workflow:\n" msg += str(ex) logging.error(msg) return None return inputDataset.name()
print usage sys.exit(1) if nevts == None: print "--nevts option not provided \n The default of the workflow will be used." ## check workflow existing on disk workflow=os.path.expandvars(os.path.expanduser(workflow)) if not os.path.exists(workflow): print "Workflow not found: %s" % workflow sys.exit(1) ## get the workflow name workflowSpec = WorkflowSpec() workflowSpec.load(workflow) workflowName = workflowSpec.workflowName() workflowBase=os.path.basename(workflow) ## use MessageService ms = MessageService() ## register message service instance as "Test" ms.registerAs("TestSkim") ## Debug level ms.publish("DatasetInjector:StartDebug","none") ms.publish("JobCreator:StartDebug","none") ms.publish("JobSubmitter:StartDebug","none") ms.publish("DBSInterface:StartDebug","none") ms.publish("ErrorHandler:StartDebug","none") ms.publish("TrackingComponent:StartDebug","none") ms.commit()
def __call__(self, collectPayload): """ _operator(collectPayload)_ Given the dataset in the payload, callout to DBS to find the files to be harvested """ msg = "RelValPlugin invoked for %s" % str(collectPayload) logging.info(msg) if collectPayload.get('Scenario', None) is None: msg = "RelValPlugin: Payload should provide a scenario." raise RuntimeError, msg site = self.args.get("Site", "srm-cms.cern.ch") baseCache = os.path.join(self.args['ComponentDir'], "RelValPlugin") if not os.path.exists(baseCache): os.makedirs(baseCache) datasetCache = os.path.join(baseCache, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) if not os.path.exists(datasetCache): os.makedirs(datasetCache) workflowFile = os.path.join( datasetCache, "%s-%s-%s-DQMHarvest-Workflow.xml" % ( collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) ) if not os.path.exists(workflowFile): msg = "No workflow found for dataset: %s\n " % ( collectPayload.datasetPath()) msg += "Looking up software version and generating workflow..." logging.info(msg) # Override Global Tag? if self.args.get("OverrideGlobalTag", None) is not None: globalTag = self.args['OverrideGlobalTag'] msg = "Using Overrride for Global: %s" % globalTag logging.info(msg) # Global Tag provided in the payload? elif collectPayload.get('GlobalTag', None) is not None: globalTag = collectPayload['GlobalTag'] msg = "Global tag found in payload: %s" % globalTag logging.info(msg) # Look up in DBS for Global Tag, use fallback GT as last resort else: globalTag = findGlobalTagForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) # Override CMSSW Version if self.args.get("OverrideCMSSW", None) is not None: cmsswVersion = self.args['OverrideCMSSW'] msg = "Using Override for CMSSW Version %s" % ( self.args['OverrideCMSSW'],) logging.info(msg) # CMSSW Version provided in the payload? elif collectPayload.get('CMSSWVersion', None) is not None: cmsswVersion = collectPayload['CMSSWVersion'] msg = "CMSSW Version found in payload: %s" % cmsswVersion logging.info(msg) else: cmsswVersion = findVersionForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) msg = "CMSSW Version for dataset/run\n" msg += " Dataset %s\n" % collectPayload.datasetPath() msg += " CMSSW Version = %s\n " % cmsswVersion logging.info(msg) workflowSpec = createHarvestingWorkflow( collectPayload.datasetPath(), site, self.args['CmsPath'], self.args['ScramArch'], cmsswVersion, globalTag, configFile=self.args['ConfigFile'], DQMServer=self.args['DQMServer'], proxyLocation=self.args['proxyLocation'], DQMCopyToCERN=self.args['DQMCopyToCERN'], doStageOut=self.args['DoStageOut']) workflowSpec.save(workflowFile) msg = "Created Harvesting Workflow:\n %s" % workflowFile msg += "\nThe following parameters were used:\n" msg += "DQMserver ==> %s\n" % (self.args['DQMServer']) msg += "proxyLocation ==> %s\n" % (self.args['proxyLocation']) msg += "Stage Out ==> %s\n" % (self.args['DoStageOut']) msg += "DQMCopyToCERN ==> %s\n" % (self.args['DQMCopyToCERN']) logging.info(msg) self.publishWorkflow(workflowFile, workflowSpec.workflowName()) else: msg = "Loading existing workflow for dataset: %s\n " % ( collectPayload.datasetPath()) msg += " => %s\n" % workflowFile logging.info(msg) workflowSpec = WorkflowSpec() workflowSpec.load(workflowFile) job = {} jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s-%s" % ( workflowSpec.workflowName(), collectPayload['RunNumber'], time.strftime("%H-%M-%S-%d-%m-%y") ) jobSpec.setJobName(jobName) jobSpec.setJobType("Harvesting") # Adding specific parameters to the JobSpec jobSpec.parameters['RunNumber'] = collectPayload['RunNumber'] # How should we manage the run numbers? jobSpec.parameters['Scenario'] = collectPayload['Scenario'] if collectPayload.get('RefHistKey', None) is not None: jobSpec.parameters['RefHistKey'] = collectPayload['RefHistKey'] jobSpec.addWhitelistSite(site) jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) jobSpec.payload.cfgInterface.inputFiles.extend( getLFNForDataset(self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], run=collectPayload['RunNumber'])) specCacheDir = os.path.join( datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4)) if not os.path.exists(specCacheDir): os.makedirs(specCacheDir) jobSpecFile = os.path.join(specCacheDir, "%s-JobSpec.xml" % jobName) jobSpec.save(jobSpecFile) job["JobSpecId"] = jobName job["JobSpecFile"] = jobSpecFile job['JobType'] = "Harvesting" job["WorkflowSpecId"] = workflowSpec.workflowName(), job["WorkflowPriority"] = 10 job["Sites"] = [site] job["Run"] = collectPayload['RunNumber'] job['WorkflowSpecFile'] = workflowFile msg = "Harvesting Job Created for\n" msg += " => Run: %s\n" % collectPayload['RunNumber'] msg += " => Primary: %s\n" % collectPayload['PrimaryDataset'] msg += " => Processed: %s\n" % collectPayload['ProcessedDataset'] msg += " => Tier: %s\n" % collectPayload['DataTier'] msg += " => Workflow: %s\n" % job['WorkflowSpecId'] msg += " => Job: %s\n" % job['JobSpecId'] msg += " => Site: %s\n" % job['Sites'] logging.info(msg) return [job]
for run in range(firstrun, lastrun+1): jobCreator.setRun(run) # if this is needed we should create # a JobCreator instance per run #workflowSpec.setWorkflowRunNumber(run) jobList = [] for lumi in range(1,lumiperrun+1): jobCreator.setLumi(lumi) jobCreator.setEventsPerJob(eventsperjob) jobCreator.setFirstEvent(1+lumi*eventsperjob) jobName = "%s-%s-%s" % (workflowSpec.workflowName(), run, lumi) jobSpec = workflowSpec.createJobSpec() jobSpecDir = os.path.join("/data/hufnagel/parepack/StreamerMCRunning", str(run // 1000).zfill(4)) if not os.path.exists(jobSpecDir): os.makedirs(jobSpecDir) jobSpecFileName = jobName + "-jobspec.xml" jobSpecFile = os.path.join(jobSpecDir, jobSpecFileName) jobSpec.setJobName(jobName) # used for thresholds