def add(self, workflowFile): """ _add_ Add a dataset to the list of watched datasets. Arguments: workflowFile -- the workflow specification file Return: the datasetId """ # read the WorkflowSpecFile try: wfile = WorkflowSpec() wfile.load(workflowFile) # wrong dataset file except Exception, msg: raise InvalidDataset, \ "Error loading workflow specifications from %s" % workflowFile
class JobSpecExpander: def __init__(self, jobSpecFile): self.jobSpec = JobSpec() self.jobSpec.load(jobSpecFile) self.taskState = TaskState(os.getcwd()) self.taskState.loadRunResDB() self.workflowSpec = WorkflowSpec() self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"]) self.config = self.taskState.configurationDict() finder = NodeFinder(self.taskState.taskName()) self.jobSpec.payload.operate(finder) self.jobSpecNode = finder.result wffinder = NodeFinder(self.taskState.taskName()) self.workflowSpec.payload.operate(wffinder) self.workflowNode = wffinder.result if self.jobSpecNode.jobType != "Merge": if self.config.has_key('Configuration'): try: self.createPSet() except Exception, ex: msg = "Unable to generate cmsRun Config from JobSpec:\n" msg += str(ex) print msg badfile = open("exit.status", 'w') badfile.write("10040") badfile.close() else:
def getCMSSoft(work,reverse=False): """ opens the workflowfile and gets the CMSSoft version if reverse, returns a map between CMSSoft version and real workflowname """ new_work={} workflowSpec = WorkflowSpec() for fil in work: try: workflowSpec.load(fil) cmssw=workflowSpec.payload.application['Version'] name=workflowSpec.parameters['WorkflowName'] if reverse: if not new_work.has_key(cmssw): new_work[cmssw]=[] new_work[cmssw].append(name) else: new_work[name]=cmssw except: """ something went wrong """ msg="WorkflowConstraints getCMSSoft: something went wrong while handling file "+fil print(msg) return new_work
def createWorkflow(self, runNumber, primaryDataset, processedDataset, dataTier): """ _createWorkflow_ Create a workflow for a given run and primary dataset. If the workflow has been created previously, load it and use it. """ jobCache = os.path.join(self.args["ComponentDir"], "T0ASTPlugin", "Run" + runNumber) if not os.path.exists(jobCache): os.makedirs(jobCache) workflowSpecFileName = "DQMHarvest-Run%s-%s-workflow.xml" % (runNumber, primaryDataset) workflowSpecPath = os.path.join(jobCache, workflowSpecFileName) if os.path.exists(workflowSpecPath): msg = "Loading existing workflow for dataset: %s\n " % primaryDataset msg += " => %s\n" % workflowSpecPath logging.info(msg) workflowSpec = WorkflowSpec() workflowSpec.load(workflowSpecPath) return (workflowSpec, workflowSpecPath) msg = "No workflow found for dataset: %s\n " % primaryDataset msg += "Looking up software version and generating workflow..." recoConfig = self.t0astWrapper.listRecoConfig(runNumber, primaryDataset) if not recoConfig["DO_RECO"]: logging.info("RECO disabled for dataset %s" % primaryDataset) return (None, None) globalTag = self.args.get("OverrideGlobalTag", None) if globalTag == None: globalTag = recoConfig["GLOBAL_TAG"] cmsswVersion = self.args.get("OverrideCMSSW", None) if cmsswVersion == None: cmsswVersion = recoConfig["CMSSW_VERSION"] datasetPath = "/%s/%s/%s" % (primaryDataset, processedDataset, dataTier) workflowSpec = createHarvestingWorkflow(datasetPath, self.site, self.args["CmsPath"], self.args["ScramArch"], cmsswVersion, globalTag, configFile=self.args["ConfigFile"], DQMServer=self.args['DQMServer'], proxyLocation=self.args['proxyLocation'], DQMCopyToCERN=self.args['DQMCopyToCERN'], doStageOut=self.args['DoStageOut']) workflowSpec.save(workflowSpecPath) msg = "Created Harvesting Workflow:\n %s" % workflowSpecPath logging.info(msg) self.publishWorkflow(workflowSpecPath, workflowSpec.workflowName()) return (workflowSpec, workflowSpecPath)
def makeJobs(self, testInstance): """ _makeJobs_ Create Job Specs for the test instance provided """ logging.info("Creating Jobs for test %s at site %s" % ( testInstance['Name'], testInstance['Site']) ) testName = testInstance['WorkflowSpecId'] specInstance = WorkflowSpec() specInstance.load(testInstance['WorkflowSpecFile']) if testInstance['InputDataset'] == None: initialRun = self.jobCounts.get(testInstance['Name'], 1) factory = RequestJobFactory( specInstance, testInstance['WorkingDir'], testInstance['TotalEvents'], InitialRun = initialRun, EventsPerJob = testInstance['EventsPerJob'], Sites = [testInstance['Site']]) jobsList = factory() self.jobCounts[testInstance['Name']] += len(jobsList) else: factory = DatasetJobFactory( specInstance, testInstance['WorkingDir'], specInstance.parameters['DBSURL'], ) jobsList = factory() self.jobCounts[testInstance['Name']] += len(jobsList) msg = "Created %s jobs:\n" % len(jobsList) for job in jobsList: jobSpecFile = job['JobSpecFile'] jobSpecId = job['JobSpecId'] msg += " %s\n" % jobSpecId testInstance['JobSpecs'][jobSpecId] = jobSpecFile logging.info(msg) return
def GoodWorkflow(workflow): """ Check if workflow can be loaded """ RequestDir,firstrun = getRequestInjectorConfig() workflowCache="%s/WorkflowCache"%RequestDir workflowSpec = WorkflowSpec() try: workflowSpec.load(workflow) except: return False return True
class FactoryInterface: """ _FactoryInterface_ JobSpec Factory Interface defintion & common utils for all job spec factory generators """ def __init__(self, workflowSpec): # or use isinstance(WorkflowSpec) if need to include sub classes if workflowSpec.__class__ is WorkflowSpec: self.workflow = workflowSpec else: self.workflow = WorkflowSpec() self.workflow.load(workflowSpec)
def loadWorkflow(self, specFile): """ _loadWorkflow_ Helper method, since every plugin will have to do something with a workflow """ spec = WorkflowSpec() try: spec.load(specFile) except Exception, ex: msg = "Unable to read workflow spec file:\n%s\n" % specFile msg += str(ex) raise RuntimeError, msg
def createJobSpec(jobSpecId, workflowSpecFile, filename, runNumber, eventCount, firstEvent=None, saveString=False, loadString=True): # // # // Load workflow #// workflowSpec = WorkflowSpec() if loadString: workflowSpec.loadString(workflowSpecFile) else: workflowSpec.load(workflowSpecFile) # // # // Create JobSpec #// jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % (workflowSpec.workflowName(), runNumber) #jobSpec.setJobName(jobName) jobSpec.setJobName(jobSpecId) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = runNumber jobSpec.parameters['EventCount'] = eventCount jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) if firstEvent != None: jobSpec.parameters['FirstEvent'] = firstEvent cfgMaker = ConfigGenerator(jobSpec) jobSpec.payload.operate(cfgMaker) if saveString: return jobSpec.saveString() jobSpec.save(filename) return
def createJobSpec(jobSpecId,workflowSpecFile, filename, runNumber, eventCount, firstEvent = None,saveString=False,loadString=True): # // # // Load workflow #// workflowSpec = WorkflowSpec() if loadString: workflowSpec.loadString(workflowSpecFile) else: workflowSpec.load(workflowSpecFile) # // # // Create JobSpec #// jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % ( workflowSpec.workflowName(), runNumber ) #jobSpec.setJobName(jobName) jobSpec.setJobName(jobSpecId) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = runNumber jobSpec.parameters['EventCount'] = eventCount jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) if firstEvent != None: jobSpec.parameters['FirstEvent'] = firstEvent cfgMaker = ConfigGenerator(jobSpec) jobSpec.payload.operate(cfgMaker) if saveString: return jobSpec.saveString() jobSpec.save(filename) return
def __call__(self, collectPayload): """ _operator(collectPayload)_ Given the dataset in the payload, callout to DBS to find the files to be harvested """ msg = "RelValPlugin invoked for %s" % str(collectPayload) logging.info(msg) if collectPayload.get('Scenario', None) is None: msg = "RelValPlugin: Payload should provide a scenario." raise RuntimeError, msg site = self.args.get("Site", "srm-cms.cern.ch") baseCache = os.path.join(self.args['ComponentDir'], "RelValPlugin") if not os.path.exists(baseCache): os.makedirs(baseCache) datasetCache = os.path.join(baseCache, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) if not os.path.exists(datasetCache): os.makedirs(datasetCache) workflowFile = os.path.join( datasetCache, "%s-%s-%s-DQMHarvest-Workflow.xml" % ( collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) ) if not os.path.exists(workflowFile): msg = "No workflow found for dataset: %s\n " % ( collectPayload.datasetPath()) msg += "Looking up software version and generating workflow..." logging.info(msg) # Override Global Tag? if self.args.get("OverrideGlobalTag", None) is not None: globalTag = self.args['OverrideGlobalTag'] msg = "Using Overrride for Global: %s" % globalTag logging.info(msg) # Global Tag provided in the payload? elif collectPayload.get('GlobalTag', None) is not None: globalTag = collectPayload['GlobalTag'] msg = "Global tag found in payload: %s" % globalTag logging.info(msg) # Look up in DBS for Global Tag, use fallback GT as last resort else: globalTag = findGlobalTagForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) # Override CMSSW Version if self.args.get("OverrideCMSSW", None) is not None: cmsswVersion = self.args['OverrideCMSSW'] msg = "Using Override for CMSSW Version %s" % ( self.args['OverrideCMSSW'],) logging.info(msg) # CMSSW Version provided in the payload? elif collectPayload.get('CMSSWVersion', None) is not None: cmsswVersion = collectPayload['CMSSWVersion'] msg = "CMSSW Version found in payload: %s" % cmsswVersion logging.info(msg) else: cmsswVersion = findVersionForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) msg = "CMSSW Version for dataset/run\n" msg += " Dataset %s\n" % collectPayload.datasetPath() msg += " CMSSW Version = %s\n " % cmsswVersion logging.info(msg) workflowSpec = createHarvestingWorkflow( collectPayload.datasetPath(), site, self.args['CmsPath'], self.args['ScramArch'], cmsswVersion, globalTag, configFile=self.args['ConfigFile'], DQMServer=self.args['DQMServer'], proxyLocation=self.args['proxyLocation'], DQMCopyToCERN=self.args['DQMCopyToCERN'], doStageOut=self.args['DoStageOut']) workflowSpec.save(workflowFile) msg = "Created Harvesting Workflow:\n %s" % workflowFile msg += "\nThe following parameters were used:\n" msg += "DQMserver ==> %s\n" % (self.args['DQMServer']) msg += "proxyLocation ==> %s\n" % (self.args['proxyLocation']) msg += "Stage Out ==> %s\n" % (self.args['DoStageOut']) msg += "DQMCopyToCERN ==> %s\n" % (self.args['DQMCopyToCERN']) logging.info(msg) self.publishWorkflow(workflowFile, workflowSpec.workflowName()) else: msg = "Loading existing workflow for dataset: %s\n " % ( collectPayload.datasetPath()) msg += " => %s\n" % workflowFile logging.info(msg) workflowSpec = WorkflowSpec() workflowSpec.load(workflowFile) job = {} jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s-%s" % ( workflowSpec.workflowName(), collectPayload['RunNumber'], time.strftime("%H-%M-%S-%d-%m-%y") ) jobSpec.setJobName(jobName) jobSpec.setJobType("Harvesting") # Adding specific parameters to the JobSpec jobSpec.parameters['RunNumber'] = collectPayload['RunNumber'] # How should we manage the run numbers? jobSpec.parameters['Scenario'] = collectPayload['Scenario'] if collectPayload.get('RefHistKey', None) is not None: jobSpec.parameters['RefHistKey'] = collectPayload['RefHistKey'] jobSpec.addWhitelistSite(site) jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) jobSpec.payload.cfgInterface.inputFiles.extend( getLFNForDataset(self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], run=collectPayload['RunNumber'])) specCacheDir = os.path.join( datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4)) if not os.path.exists(specCacheDir): os.makedirs(specCacheDir) jobSpecFile = os.path.join(specCacheDir, "%s-JobSpec.xml" % jobName) jobSpec.save(jobSpecFile) job["JobSpecId"] = jobName job["JobSpecFile"] = jobSpecFile job['JobType'] = "Harvesting" job["WorkflowSpecId"] = workflowSpec.workflowName(), job["WorkflowPriority"] = 10 job["Sites"] = [site] job["Run"] = collectPayload['RunNumber'] job['WorkflowSpecFile'] = workflowFile msg = "Harvesting Job Created for\n" msg += " => Run: %s\n" % collectPayload['RunNumber'] msg += " => Primary: %s\n" % collectPayload['PrimaryDataset'] msg += " => Processed: %s\n" % collectPayload['ProcessedDataset'] msg += " => Tier: %s\n" % collectPayload['DataTier'] msg += " => Workflow: %s\n" % job['WorkflowSpecId'] msg += " => Job: %s\n" % job['JobSpecId'] msg += " => Site: %s\n" % job['Sites'] logging.info(msg) return [job]
import StageOut.Utilities as StageOutUtils from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec if __name__ == '__main__': msg = "******RuntimeStageOutFailure Invoked*****" state = TaskState(os.getcwd()) state.loadRunResDB() config = state.configurationDict() # // # // find inputs by locating the task for which we are staging out #// and loading its TaskState workflow = WorkflowSpec() workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC']) stageOutFor, override, controls = StageOutUtils.getStageOutConfig( workflow, state.taskName()) inputTasks = stageOutFor for inputTask in inputTasks: inputState = getTaskState(inputTask) if inputState == None: msg = "Input State: %s Not found, skipping..." % inputTask continue inputReport = inputState.getJobReport() inputReport.status = "Failed" if inputReport.exitCode in (0, "0"):
class RequestIterator: """ _RequestIterator_ Working from a Generic Workflow template, generate concrete jobs from it, keeping in-memory history """ def __init__(self, workflowSpecFile, workingDir): self.workflow = workflowSpecFile self.workingDir = workingDir self.count = 0 self.runIncrement = 1 self.currentJob = None self.sitePref = None self.pileupDatasets = {} self.ownedJobSpecs = {} # // # // Initially hard coded, should be extracted from Component Config #// self.eventsPerJob = 10 self.workflowSpec = WorkflowSpec() try: self.workflowSpec.load(workflowSpecFile) except: logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile)) return if self.workflowSpec.parameters.get("RunIncrement", None) != None: self.runIncrement = int( self.workflowSpec.parameters['RunIncrement'] ) self.generators = GeneratorMaker() self.workflowSpec.payload.operate(self.generators) # // # // Cache Area for JobSpecs #// self.specCache = os.path.join( self.workingDir, "%s-Cache" %self.workflowSpec.workflowName()) if not os.path.exists(self.specCache): os.makedirs(self.specCache) def loadPileupDatasets(self): """ _loadPileupDatasets_ Are we dealing with pileup? If so pull in the file list """ puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) self.pileupDatasets = createPileupDatasets(self.workflowSpec) return def loadPileupSites(self): """ _loadPileupSites_ Are we dealing with pileup? If so pull in the site list """ sites = [] puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) sites = getPileupSites(self.workflowSpec) return sites def __call__(self): """ _operator()_ When called generate a new concrete job payload from the generic workflow and return it. """ newJobSpec = self.createJobSpec() self.count += self.runIncrement return newJobSpec def createJobSpec(self): """ _createJobSpec_ Load the WorkflowSpec object and generate a JobSpec from it """ jobSpec = self.workflowSpec.createJobSpec() jobName = "%s-%s" % ( self.workflowSpec.workflowName(), self.count, ) self.currentJob = jobName jobSpec.setJobName(jobName) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = self.count jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) jobSpec.payload.operate(self.generateJobConfig) jobSpec.payload.operate(self.generateCmsGenConfig) specCacheDir = os.path.join( self.specCache, str(self.count // 1000).zfill(4)) if not os.path.exists(specCacheDir): os.makedirs(specCacheDir) jobSpecFile = os.path.join(specCacheDir, "%s-JobSpec.xml" % jobName) self.ownedJobSpecs[jobName] = jobSpecFile # // # // Add site pref if set #// if self.sitePref != None: # #AF: Allow site pref to be a comma separated list of sites, each one # added in the Whitelist: # jobSpec.addWhitelistSite(self.sitePref) for siteWhite in self.sitePref.split(","): jobSpec.addWhitelistSite(siteWhite) jobSpec.save(jobSpecFile) return jobSpecFile def generateJobConfig(self, jobSpecNode): """ _generateJobConfig_ Operator to act on a JobSpecNode tree to convert the template config file into a JobSpecific Config File """ if jobSpecNode.name not in self.generators.keys(): return generator = self.generators[jobSpecNode.name] useOutputMaxEv = False if jobSpecNode.cfgInterface != None: outMaxEv = jobSpecNode.cfgInterface.maxEvents['output'] if outMaxEv != None: useOutputMaxEv = True if useOutputMaxEv: jobCfg = generator(self.currentJob, maxEventsWritten = self.eventsPerJob, firstRun = self.count) else: jobCfg = generator(self.currentJob, maxEvents = self.eventsPerJob, firstRun = self.count) # // # // Is there pileup for this node? #// if self.pileupDatasets.has_key(jobSpecNode.name): puDataset = self.pileupDatasets[jobSpecNode.name] logging.debug("Node: %s has a pileup dataset: %s" % ( jobSpecNode.name, puDataset.dataset, )) fileList = puDataset.getPileupFiles() jobCfg.pileupFiles = fileList jobSpecNode.cfgInterface = jobCfg return def generateCmsGenConfig(self, jobSpecNode): """ _generateCmsGenConfig_ Process CmsGen type nodes to insert maxEvents and run numbers for cmsGen jobs """ if jobSpecNode.type != "CmsGen": return jobSpecNode.applicationControls['firstRun'] = self.count jobSpecNode.applicationControls['maxEvents'] = self.eventsPerJob jobSpecNode.applicationControls['randomSeed'] = randomSeed() jobSpecNode.applicationControls['fileName'] = "%s-%s.root" % ( self.currentJob, jobSpecNode.name) jobSpecNode.applicationControls['logicalFileName'] = "%s-%s.root" % ( self.currentJob, jobSpecNode.name) return def removeSpec(self, jobSpecId): """ _removeSpec_ Remove a Spec file when it has been successfully injected """ if jobSpecId not in self.ownedJobSpecs.keys(): return logging.info("Removing JobSpec For: %s" % jobSpecId) filename = self.ownedJobSpecs[jobSpecId] if os.path.exists(filename): os.remove(filename) del self.ownedJobSpecs[jobSpecId] return def save(self, directory): """ _save_ Persist this objects state into an XML file and save it in the directory provided """ doc = IMProvDoc("RequestIterator") node = IMProvNode(self.workflowSpec.workflowName()) doc.addNode(node) node.addNode(IMProvNode("Run", None, Value = str(self.count))) node.addNode( IMProvNode("EventsPerJob", None, Value = str(self.eventsPerJob)) ) node.addNode(IMProvNode("SitePref", None, Value = str(self.sitePref))) pu = IMProvNode("Pileup") node.addNode(pu) for key, value in self.pileupDatasets.items(): puNode = value.save() puNode.attrs['PayloadNode'] = key pu.addNode(puNode) specs = IMProvNode("JobSpecs") node.addNode(specs) for key, val in self.ownedJobSpecs.items(): specs.addNode(IMProvNode("JobSpec", val, ID = key)) fname = os.path.join( directory, "%s-Persist.xml" % self.workflowSpec.workflowName() ) handle = open(fname, 'w') handle.write(doc.makeDOMDocument().toprettyxml()) handle.close() return def load(self, directory): """ _load_ Load this instance given the workflow and directory containing the persistency file """ fname = os.path.join( directory, "%s-Persist.xml" % self.workflowSpec.workflowName() ) try: node = loadIMProvFile(fname) except Exception, ex: msg = "ERROR: Corrupted Persistency File:\n" msg += " => %s\n" % fname msg += "Cannot be read:\n => %s\n" % str(ex) logging.error(msg) return qbase = "/RequestIterator/%s" % self.workflowSpec.workflowName() runQ = IMProvQuery("%s/Run[attribute(\"Value\")]" % qbase) eventQ = IMProvQuery("%s/EventsPerJob[attribute(\"Value\")]" % qbase) siteQ = IMProvQuery("%s/SitePref[attribute(\"Value\")]" % qbase) runVal = int(runQ(node)[-1]) eventVal = int(eventQ(node)[-1]) siteVal = str(siteQ(node)[-1]) if siteVal.lower() == "none": siteVal = None self.count = runVal self.eventsPerJob = eventVal self.sitePref = siteVal puQ = IMProvQuery("%s/Pileup/*" % qbase) puNodes = puQ(node) for puNode in puNodes: payloadNode = str(puNode.attrs.get("PayloadNode")) puDataset = PileupDataset("dummy", 1) puDataset.load(puNode) self.pileupDatasets[payloadNode] = puDataset specQ = IMProvQuery("%s/JobSpecs/*" % qbase) specNodes = specQ(node) for specNode in specNodes: specId = str(specNode.attrs['ID']) specFile = str(specNode.chardata).strip() self.ownedJobSpecs[specId] = specFile return
logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) logging.getLogger().setLevel(logging.INFO) Dataset.setLogging(logging) database = MergeSensorDB() Dataset.setDatabase(database) workflowFile = sys.argv[1] print "Updating DB for workflow: ", workflowFile # read the WorkflowSpecFile try: wfile = WorkflowSpec() wfile.load(workflowFile) # wrong dataset file except Exception, msg: print "Error loading workflow specifications from %s: %s" \ % (workflowFile, msg) sys.exit(1) # get output modules try: outputDatasetsList = wfile.outputDatasets() outputModules = [outDS['OutputModuleName'] \ for outDS in outputDatasetsList] # remove duplicates
class DatasetIterator: """ _DatasetIterator_ Working from a Generic Workflow template, generate concrete jobs from it, keeping in-memory history """ def __init__(self, workflowSpecFile, workingDir): self.workflow = workflowSpecFile self.workingDir = workingDir self.currentJob = None self.workflowSpec = WorkflowSpec() self.workflowSpec.load(workflowSpecFile) self.currentJobDef = None self.count = 0 self.onlyClosedBlocks = False if self.workflowSpec.parameters.has_key("OnlyClosedBlocks"): onlyClosed = str( self.workflowSpec.parameters["OnlyClosedBlocks"]).lower() if onlyClosed == "true": self.onlyClosedBlocks = True self.ownedJobSpecs = {} self.allowedBlocks = [] self.allowedSites = [] self.dbsUrl = getLocalDBSURL() self.splitType = \ self.workflowSpec.parameters.get("SplitType", "file").lower() self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1)) self.generators = GeneratorMaker() self.generators(self.workflowSpec.payload) self.pileupDatasets = {} # // # // Does the workflow contain a block restriction?? #// blockRestriction = \ self.workflowSpec.parameters.get("OnlyBlocks", None) if blockRestriction != None: # // # // restriction on blocks present, populate allowedBlocks list #// msg = "Block restriction provided in Workflow Spec:\n" msg += "%s\n" % blockRestriction blockList = blockRestriction.split(",") for block in blockList: if len(block.strip() ) > 0: self.allowedBlocks.append(block.strip()) # // # // Does the workflow contain a site restriction?? #// siteRestriction = \ self.workflowSpec.parameters.get("OnlySites", None) if siteRestriction != None: # // # // restriction on sites present, populate allowedSites list #// msg = "Site restriction provided in Workflow Spec:\n" msg += "%s\n" % siteRestriction siteList = siteRestriction.split(",") for site in siteList: if len(site.strip() ) > 0: self.allowedSites.append(site.strip()) # // # // Is the DBSURL contact information provided?? #// value = self.workflowSpec.parameters.get("DBSURL", None) if value != None: self.dbsUrl = value if self.dbsUrl == None: msg = "Error: No DBSURL available for dataset:\n" msg += "Cant get local DBSURL and one not provided with workflow" raise RuntimeError, msg # // # // Cache Area for JobSpecs #// self.specCache = os.path.join( self.workingDir, "%s-Cache" %self.workflowSpec.workflowName()) if not os.path.exists(self.specCache): os.makedirs(self.specCache) def __call__(self, jobDef): """ _operator()_ When called generate a new concrete job payload from the generic workflow and return it. The JobDef should be a JobDefinition with the input details including LFNs and event ranges etc. """ newJobSpec = self.createJobSpec(jobDef) self.count += 1 return newJobSpec def loadPileupDatasets(self): """ _loadPileupDatasets_ Are we dealing with pileup? If so pull in the file list """ puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) self.pileupDatasets = createPileupDatasets(self.workflowSpec) return def loadPileupSites(self): """ _loadPileupSites_ Are we dealing with pileup? If so pull in the site list """ sites = [] puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) sites = getPileupSites(self.workflowSpec) return sites def inputDataset(self): """ _inputDataset_ Extract the input Dataset from this workflow """ topNode = self.workflowSpec.payload try: inputDataset = topNode._InputDatasets[-1] except StandardError, ex: msg = "Error extracting input dataset from Workflow:\n" msg += str(ex) logging.error(msg) return None return inputDataset.name()
def stageOut(): """ _stageOut_ Main function for this module. Loads data from the task and manages the stage out process for a single attempt """ state = TaskState(os.getcwd()) state.loadRunResDB() workflow = WorkflowSpec() workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC']) jobSpecFile = os.environ.get('PRODAGENT_JOBSPEC') jobSpecId = None if jobSpecFile is not None: jobSpec = JobSpec() jobSpec.load(jobSpecFile) jobSpecId = jobSpec.parameters.get('JobName') print workflow print state.taskName() print jobSpecId stageOutFor, override, controls = StageOutUtils.getStageOutConfig( workflow, state.taskName()) toplevelReport = os.path.join(os.environ['PRODAGENT_JOB_DIR'], "FrameworkJobReport.xml") exitCode = 0 # // # // find inputs by locating the task for which we are staging out #// and loading its TaskState for inputTask in stageOutFor: print "Attempting to stage out files for node %s" % inputTask try: inputState = getTaskState(inputTask) msg = "Loaded Input Task: %s " % inputTask except Exception, ex: msg = "Error load for TaskState for task %s" % inputTask msg += "%s\n" % str(ex) inputState = None print msg if inputState == None: # exit with init error # generate failure report in this dir, since cant find # input state dir inputReport = FwkJobReport() inputReport.name = inputTask inputReport.jobSpecId = jobSpecId exitCode = 60311 errRep = inputReport.addError( 60311, "TaskStateError") errRep['Description'] = msg inputReport.status = "Failed" inputReport.exitCode = 60311 updateReport(toplevelReport, inputReport) print "TaskState is None, exiting..." return exitCode try: inputReport = inputState.getJobReport() msg = "Loaded JobReport for Task : %s\n" % inputTask msg += "File: %s\n" % inputState.jobReport except Exception, ex: msg = "Error loading input report : %s" % str(ex) inputReport = None
import os import pickle from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWAPILoader import CMSSWAPILoader from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec from ProdCommon.MCPayloads.DatasetTools import getOutputDatasetsWithPSet specfile = "/uscms/home/gutsche/CSA08-JetET110-CSA08_S43_S43_rereco_may19_PIC_v1-Workflow.xml" rawCfgFile = "%s.raw.cfg" % os.path.basename(specfile) origCfgFile = "%s.orig.cfg" % os.path.basename(specfile) dbsCfgFile = "%s.dbs.cfg" % os.path.basename(specfile) spec = WorkflowSpec() spec.load(specfile) rawCfg = spec.payload.cfgInterface.rawCfg originalCfg = spec.payload.cfgInterface.originalCfg dbsDatasets = getOutputDatasetsWithPSet(spec.payload) handle = open(dbsCfgFile, 'w') handle.write( dbsDatasets[0]['PSetContent'] ) handle.close() handle = open(origCfgFile, 'w') handle.write(originalCfg)
class ResultsStatus: """ _ResultsStatus_ Object to retrieve and compute the overall state of a Results Workflow """ def __init__(self, config, msgSvcRef, **workflowDetails): self.configuration = config self.msgSvcRef = msgSvcRef self.workflowDetails = workflowDetails self.workflow = workflowDetails['id'] self.workflowFile = workflowDetails['workflow_spec_file'] self.workflowSpec = WorkflowSpec() self.workflowSpec.load(self.workflowFile) self.doMigration = self.configuration.get("MigrateToGlobal", True) self.doInjection = self.configuration.get("InjectToPhEDEx", True) def __call__(self): """ _operator()_ Evaluate the status of this workflow from the WorkflowEntities data and publish any events that are triggered """ if self.processingComplete(): logging.info("Processing Complete for %s" % self.workflow) for dataset in self.unmergedDatasets(): if self.doMigration: logging.debug( "Publishing MigrateToGlobal for %s" % dataset) self.msgSvcRef.publish( "DBSInterface:MigrateDatasetToGlobal", dataset) self.msgSvcRef.commit() if self.doInjection: logging.debug("Publishing PollMigration for %s" % dataset) self.msgSvcRef.publish("StoreResultsAccountant:PollMigration", self.workflowFile, "00:02:00") self.msgSvcRef.commit() Session.commit_all() WEWorkflow.setFinished(self.workflow) WEWorkflow.remove(self.workflow) Session.commit_all() return def processingComplete(self): """ _processingComplete_ look at the processing jobs for the workflow, and return True if all processing jobs are complete """ intermediateDBS = self.workflowSpec.parameters['DBSURL'] outputDataset = self.workflowSpec.outputDatasets()[0].name() allJobs = WEUtils.jobsForWorkflow(self.workflow, "Merge") finishedJobs = WEUtils.jobsForWorkflow(self.workflow, "Merge", "finished") totalProcessing = len(allJobs) totalComplete = len(finishedJobs) logging.info("%s: %s/%s jobs complete" % (self.workflow,totalComplete,totalProcessing)) if totalProcessing == 0: # Protection for non-sensical situation return False if totalComplete < totalProcessing: return False # Check to make sure local DBS knows about all output files try: reader = DBSReader(intermediateDBS) blockList = reader.getFiles(dataset = outputDataset) except: logging.info("Dataset not in DBS yet") return False totalRegistered = 0 for block in blockList: totalRegistered += len(blockList[block]['Files']) logging.info("%s: %s/%s jobs registered" % (self.workflow,totalRegistered,totalProcessing)) if totalRegistered < totalProcessing: return False return True def unmergedDatasets(self): """ _unmergedDatasets_ Retrieve a list of datasets tht can be ForceMerge'd """ extractor = ExtractDatasets() self.workflowSpec.payload.operate(extractor) result = [ x.name() for x in extractor.datasets ] return result
if opt == "--prod-events": totalEvents = int(arg) if workflowFile == None: msg = "--prod-workflow not set" raise RuntimeError, msg if workingDir == None: msg = "--working-dir not set" raise RuntimeError, msg # // # // Script wide objects #// workflowSpec = WorkflowSpec() workflowSpec.load(workflowFile) productionDir = "%s/production" % workingDir mergeProdDir = "%s/production-merge" % workingDir if not os.path.exists(productionDir): os.makedirs(productionDir) if not os.path.exists(mergeProdDir): os.makedirs(mergeProdDir) mergeProdSpecs = createMergeJobWorkflow(workflowSpec) prodFactory = RequestJobFactory(workflowSpec, productionDir, productionEvents) for mergeDS, mergeSpec in mergeProdSpecs.items(): mrgSpecFile = "%s/%s.xml" % (mergeProdDir, mergeDS.replace("/", "_")) mergeSpec.save(mrgSpecFile)
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWConfig import CMSSWConfig if workflow == None: msg = "Error: --workflow option is required" raise RuntimeError, msg if not os.path.exists(workflow): msg = "Cannot find workflow file:\n %s" % workflow raise RuntimeError, msg spec = WorkflowSpec() spec.load(workflow) def indexDict(modName, modRef): """ _indexDict_ Given a PSet like python dictionary, generate an index file from it. If it has PSet children, act on them recursively """ result = [] # //
class RelValStatus: """ _RelValStatus_ Object to retrieve and compute the overall state of a RelVal Workflow """ def __init__(self, config, msgSvcRef, **workflowDetails): self.configuration = config self.msgSvcRef = msgSvcRef self.workflowDetails = workflowDetails self.workflow = workflowDetails['id'] self.workflowFile = workflowDetails['workflow_spec_file'] self.workflowSpec = WorkflowSpec() self.workflowSpec.load(self.workflowFile) self.doMigration = self.configuration.get("MigrateToGlobal", False) self.doInjection = self.configuration.get("InjectToPhEDEx", False) def __call__(self): """ _operator()_ Evaluate the status of this workflow from the WorkflowEntities data and publish any events that are triggered """ processed = False merged = False if self.processingComplete(): logging.info("Processing Complete for %s" % self.workflow) processed = True # // # // publish ForceMerge for datasets #// for dataset in self.unmergedDatasets(): if countOutstandingUnmergedFiles(dataset) > 0: logging.debug("Publishing ForceMerge for %s" % dataset) self.msgSvcRef.publish("ForceMerge", dataset) self.msgSvcRef.commit() Session.commit_all() if self.mergingComplete(): logging.info("Merging Complete for %s" % self.workflow) merged = True # // # // Close Blocks and migrate to global #// Inject them into PhEDEx for dataset in self.mergedDatasets(): if self.doMigration: logging.debug( "Publishing MigrateToGlobal for %s" % dataset) self.msgSvcRef.publish( "DBSInterface:MigrateDatasetToGlobal", dataset) self.msgSvcRef.commit() if self.doInjection: logging.debug( "Publishing PhEDExInjectDataset for %s" % dataset) self.msgSvcRef.publish("PhEDExInjectDataset", dataset) self.msgSvcRef.commit() Session.commit_all() if processed and merged: # // # // All done: close the workflow out #// logging.info("Workflow %s complete" % self.workflow) WEWorkflow.setFinished(self.workflow) WEWorkflow.remove(self.workflow) Session.commit_all() # // # // Generate summary #// self.summariseWorkflow() return def summariseWorkflow(self): """ _summariseWorkflow_ Workflow has been finished, do whatever is required to generate a summary for the jobs and dispatch it to wherever it is needed """ logging.info("Summarising Workflow %s" % self.workflow) # // # // Need input from Data Ops here: #// # // Can gather information from: # // - Job Reports in Cache #// - ProdMon tables # // - WE Tables # // Generate summary HTML? #// Publish to web somewhere? pass def processingComplete(self): """ _processingComplete_ look at the processing jobs for the workflow, and return True if all processing jobs are complete """ allJobs = WEUtils.jobsForWorkflow(self.workflow, "Processing") finishedJobs = WEUtils.jobsForWorkflow(self.workflow, "Processing", "finished") totalProcessing = len(allJobs) totalProcComplete = len(finishedJobs) if totalProcComplete < totalProcessing: return False return True def unmergedDatasets(self): """ _unmergedDatasets_ Retrieve a list of datasets tht can be ForceMerge'd """ extractor = ExtractDatasets() self.workflowSpec.payload.operate(extractor) result = [ x.name() for x in extractor.datasets ] return result def mergingComplete(self): """ _mergingComplete_ look at the jobs for the merge jobs for the workflow and return True if all merge jobs are complete """ allMerges = WEUtils.jobsForWorkflow(self.workflow, "Merge") finishedMerges = WEUtils.jobsForWorkflow(self.workflow, "Merge", "finished") totalMerging = len(allMerges) totalMergeComplete = len(finishedMerges) if totalMerging == 0: # no merges in the system => no merge complete return False if totalMergeComplete < totalMerging: return False return True def mergedDatasets(self): """ _mergedDatasets_ Get a list of merged datasets from the workflow that can be used to close blocks, migrate and inject """ mergeWorkflow = createMergeDatasetWorkflow(self.workflowSpec) extractor = ExtractDatasets() mergeWorkflow.payload.operate(extractor) result = [ x.name() for x in extractor.datasets ] return result
def __call__(self, collectPayload): """ _operator(collectPayload)_ Given the dataset and run in the payload, callout to DBS to find the files to be harvested """ msg = "DBSPlugin invoked for %s" % str(collectPayload) logging.info(msg) site = self.args.get("Site", "srm.cern.ch") baseCache = os.path.join(self.args['ComponentDir'], "DBSPlugin") if not os.path.exists(baseCache): os.makedirs(baseCache) datasetCache = os.path.join(baseCache, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) if not os.path.exists(datasetCache): os.makedirs(datasetCache) workflowFile = os.path.join( datasetCache, "%s-%s-%s-DQMHarvest-Workflow.xml" % ( collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) ) if not os.path.exists(workflowFile): msg = "No workflow found for dataset: %s\n " % ( collectPayload.datasetPath(),) msg += "Looking up software version and generating workflow..." if self.args.get("OverrideGlobalTag", None) == None: globalTag = findGlobalTagForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) else: globalTag = self.args['OverrideGlobalTag'] if self.args.get("OverrideCMSSW", None) != None: cmsswVersion = self.args['OverrideCMSSW'] msg = "Using Override for CMSSW Version %s" % ( self.args['OverrideCMSSW'],) logging.info(msg) else: cmsswVersion = findVersionForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) msg = "Found CMSSW Version for dataset/run\n" msg += " Dataset %s Run %s\n" % (collectPayload.datasetPath(), collectPayload['RunNumber']) msg += " CMSSW Version = %s\n " % cmsswVersion logging.info(msg) workflowSpec = createHarvestingWorkflow( collectPayload.datasetPath(), site, self.args['CmsPath'], self.args['ScramArch'], cmsswVersion, globalTag, configFile=self.args['ConfigFile'], DQMServer=self.args['DQMServer'], proxyLocation=self.args['proxyLocation'], DQMCopyToCERN=self.args['DQMCopyToCERN'], doStageOut=self.args['DoStageOut']) workflowSpec.save(workflowFile) msg = "Created Harvesting Workflow:\n %s" % workflowFile logging.info(msg) self.publishWorkflow(workflowFile, workflowSpec.workflowName()) else: msg = "Loading existing workflow for dataset: %s\n " % ( collectPayload.datasetPath(),) msg += " => %s\n" % workflowFile logging.info(msg) workflowSpec = WorkflowSpec() workflowSpec.load(workflowFile) job = {} jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s-%s" % ( workflowSpec.workflowName(), collectPayload['RunNumber'], time.strftime("%H-%M-%S-%d-%m-%y") ) jobSpec.setJobName(jobName) jobSpec.setJobType("Harvesting") jobSpec.parameters['RunNumber'] = collectPayload['RunNumber'] jobSpec.addWhitelistSite(site) jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) jobSpec.payload.cfgInterface.inputFiles.extend( listFilesInRun( DBSReader(self.dbsUrl), collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) ) specCacheDir = os.path.join( datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4)) if not os.path.exists(specCacheDir): os.makedirs(specCacheDir) jobSpecFile = os.path.join(specCacheDir, "%s-JobSpec.xml" % jobName) jobSpec.save(jobSpecFile) job["JobSpecId"] = jobName job["JobSpecFile"] = jobSpecFile job['JobType'] = "Harvesting" job["WorkflowSpecId"] = workflowSpec.workflowName(), job["WorkflowPriority"] = 10 job["Sites"] = [site] job["Run"] = collectPayload['RunNumber'] job['WorkflowSpecFile'] = workflowFile msg = "Harvesting Job Created for\n" msg += " => Run: %s\n" % collectPayload['RunNumber'] msg += " => Primary: %s\n" % collectPayload['PrimaryDataset'] msg += " => Processed: %s\n" % collectPayload['ProcessedDataset'] msg += " => Tier: %s\n" % collectPayload['DataTier'] msg += " => Workflow: %s\n" % job['WorkflowSpecId'] msg += " => Job: %s\n" % job['JobSpecId'] msg += " => Site: %s\n" % job['Sites'] logging.info(msg) return [job]
class RepackerSetup: """ _RepackerSetup_ Object to manipulate the Configuration files for a repacker job - Extract the details of the repacker job entity stored in the config - Pull in the lumi server information and add it to the config """ def __init__(self, workflowSpec, jobSpec): self.workflowSpec = WorkflowSpec() self.workflowSpec.load(workflowSpec) self.jobSpec = JobSpec() self.jobSpec.load(jobSpec) taskState = TaskState(os.getcwd()) taskState.loadRunResDB() jobSpecFinder = NodeFinder(taskState.taskName()) self.jobSpec.payload.operate(jobSpecFinder) self.jobSpecNode = jobSpecFinder.result workflowFinder = NodeFinder(taskState.taskName()) self.workflowSpec.payload.operate(workflowFinder) self.workflowNode = workflowFinder.result self.run = None self.lumis = [] self.streamerFiles = [] self.activeDatasets = [] def unpackJobEntity(self): """ _unpackJobEntity_ Get the StreamerJobEntity from the JobSpec node """ repackJobEntity = self.jobSpecNode.cfgInterface.extensions.get('Streamer', None) if repackJobEntity == None: msg = "No StreamerJobEntity in JobSpec configuration\n" msg += "This is required for repacker jobs\n" raise RuntimeError, msg # Get run and lumi numbers for this job self.run = repackJobEntity.data['runNumber'] self.lumis = repackJobEntity.data['lumiSections'] print "Repacker Job Handling Run:%s\n LumiSections: %s\n" % (self.run,self.lumis) # Sort streamer input by lumi ID for time ordering self.streamerFiles = sortByValue(repackJobEntity.data['streamerFiles']) msg = "Streamer Files for this job are:\n" for strmr in self.streamerFiles: msg += " %s\n" % strmr print msg # Get list of active datasets for this job ## self.activeDatasets = repackJobEntity.data['activeOutputModules'] ## msg = "This Job Will repack datasets:\n" ## for dataset in self.activeDatasets: ## msg += " %s\n" % dataset ## print msg return def backupPSet(self,filename,process): """ _backupPSet_ Write a backup copy of the current PSet to disk. """ print "Wrote current configurations as %s" % filename handle = open(filename, 'w') handle.write("import pickle\n") handle.write("pickledCfg=\"\"\"%s\"\"\"\n" % pickle.dumps(process)) handle.write("process = pickle.loads(pickledCfg)\n") handle.close() return def importAndBackupProcess(self): """ _importAndBackupProcess_ Try to import the process object for the job, which is contained in PSet.py and save a backup copy of it. """ try: from PSet import process except ImportError, ex: msg = "Failed to import PSet module containing cmsRun Config\n" msg += str(ex) raise RuntimeError, msg print "PSet.py imported" self.backupPSet("PSetPreRepack.log",process) return process
class JobSpecExpander: def __init__(self, jobSpecFile): self.jobSpec = JobSpec() self.jobSpec.load(jobSpecFile) self.taskState = TaskState(os.getcwd()) self.taskState.loadRunResDB() self.workflowSpec = WorkflowSpec() self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"]) self.config = self.taskState.configurationDict() finder = NodeFinder(self.taskState.taskName()) self.jobSpec.payload.operate(finder) self.jobSpecNode = finder.result wffinder = NodeFinder(self.taskState.taskName()) self.workflowSpec.payload.operate(wffinder) self.workflowNode = wffinder.result tier0Merge = self.workflowSpec.parameters.get("Tier0Merge", "False") if self.jobSpecNode.jobType != "Merge" or tier0Merge == "True": if self.config.has_key('Configuration'): #try: self.createPSet() #except Exception, ex: # msg = "Unable to generate cmsRun Config from JobSpec:\n" # msg += str(ex) # print msg # badfile = open("exit.status", 'w') # badfile.write("10040") # badfile.close() else: # // # // Merge job #// self.createMergePSet() # do after pset created to get correct input files self.setJobDetails() if self.config.has_key('UserSandbox'): self.userSandbox() def handleInputLink(self, config, inpLink): """ _handleInputLink_ Generate the information for the input link between this task and the task specified """ msg = "Input Link Detected:\n" for k, v in inpLink.items(): msg += " %s = %s\n" % (k, v) print msg inputTask = getTaskState(inpLink['InputNode']) if inputTask == None: msg = "Unable to create InputLink for task: %s\n" % ( inpLink['InputNode'],) msg += "Input TaskState could not be retrieved..." raise RuntimeError, msg inputTask.loadJobReport() inputReport = inputTask.getJobReport() if inputReport == None: msg = "Unable to create InputLink for task: %s\n" % ( inpLink['InputNode'],) msg += "Unable to load input job report file" raise RuntimeError, msg # add files to override catalog inputFileList = [] tfc = None for file in inputReport.files: if not file['ModuleLabel'] == inpLink['OutputModule']: continue # link to file via lfn (in tfc) if link isn't standalone and we # have a valid lfn. Else refer to file via pfn if not inpLink['AppearStandalone'] and \ file.get('LFN', None) not in (None, '', 'None'): if not tfc: tfc = TrivialFileCatalog.TrivialFileCatalog() inputFileList.append(file['LFN']) tfc.addLfnToPfnRule('override', file['LFN'], file['PFN']) else: inputFileList.append("file:%s" % file['PFN']) if tfc: print "Creating override tfc, contents below" print str(tfc) tfc.write(os.path.join(os.getcwd(), 'override_catalog.xml')) if inpLink['InputSource'] == "source": # // # // feed into main source #// config.inputFiles = inputFileList if tfc: config.inputOverrideCatalog = os.path.join(os.getcwd(), 'override_catalog.xml') msg = "Input Link created to input source for files:\n" for f in inputFileList: msg += " %s\n" % f print msg return # // # // Need to add to secondary source with name provided #// raise NotImplementedError, "Havent implemented secondary source input links at present..." def localCustomization(self, config, merge = False): """ Apply site specific customizations to the config """ site_config = self.taskState.getSiteConfig() self.ioCustomization(config, site_config.io_config, merge) def ioCustomization(self, config, custom_config, merge = False): """ Apply site specific io customizations """ # Don't do anything if no customization or job has no input files if not custom_config or (merge is False and not config.inputFiles): return import re version = lambda x: tuple(int(x) for x in re.compile('(\d+)').findall(x)) cmssw_version = version(os.environ['CMSSW_VERSION']) # Only implemented in CMSSW_2_1_8 and above if cmssw_version < (2, 1, 8): return print "Site specific IO parameters will be used:" # cacheSize is a property of InputSource cache_size = custom_config.get('cacheSize', None) if cache_size: # Merge pset creates process on fly so can't use CMSSWConfig object if merge: from ProdCommon.CMSConfigTools.ConfigAPI.InputSource import InputSource inputSource = InputSource(config.source) inputSource.setCacheSize(cache_size) else: config.sourceParams['cacheSize'] = cache_size if merge: from FWCore.ParameterSet.Modules import Service config.add_(Service('AdaptorConfig')) for param in custom_config: print " %s %s" % (param, custom_config[param]) if param == 'cacheSize': continue if merge: import FWCore.ParameterSet.Types as CfgTypes adaptor = config.services['AdaptorConfig'] setattr(adaptor, param, CfgTypes.untracked(CfgTypes.string(str(custom_config[param])))) else: config.tFileAdaptorConfig[param] = custom_config[param] return def createPSet(self): """ _createPSet_ Create the PSet cfg File """ cfgFile = self.config['Configuration'].get("CfgFile", "PSet.py")[0] cfgFile = str(cfgFile) self.jobSpecNode.loadConfiguration() self.jobSpecNode.cfgInterface.rawCfg = self.workflowNode.cfgInterface.rawCfg # taken from cmssw environment # pylint: disable-msg=F0401 import FWCore.ParameterSet.Types as CfgTypes # pylint: enable-msg=F0401 workingDir = os.path.join(os.getcwd(), 'prestage') if os.path.exists(workingDir + '/prestageTFC.xml'): rawCfg = pickle.loads(self.jobSpecNode.cfgInterface.rawCfg) rawCfg.source.overrideCatalog = CfgTypes.untracked(CfgTypes.string('trivialcatalog_file:%s/prestageTFC.xml?protocol=local-stage-in' % workingDir)) self.jobSpecNode.cfgInterface.rawCfg = pickle.dumps(rawCfg) # Apply site specific customizations self.localCustomization(self.jobSpecNode.cfgInterface) for inpLink in self.jobSpecNode._InputLinks: # // # // We have in-job input links to be resolved #// self.handleInputLink(self.jobSpecNode.cfgInterface, inpLink) cmsProcess = self.jobSpecNode.cfgInterface.makeConfiguration() pycfgDump = open("PyCfgFileDump.log", 'w') try: pycfgDump.write(cmsProcess.dumpPython()) except Exception, ex: msg = "Error writing python format cfg dump:\n" msg += "%s\n" % str(ex) msg += "This needs to be reported to the framework team" pycfgDump.write(msg) pycfgDump.close() handle = open(cfgFile, 'w') handle.write("import pickle\n") handle.write("pickledCfg=\"\"\"%s\"\"\"\n" % pickle.dumps(cmsProcess)) handle.write("process = pickle.loads(pickledCfg)\n") handle.close() return
class OfflineDQMSetup: """ _OfflineDQMSetup_ Generate the PSet for the job on the fly """ def __init__(self): self.jobSpec = JobSpec() self.jobSpec.load(os.environ['PRODAGENT_JOBSPEC']) self.taskState = TaskState(os.getcwd()) self.taskState.loadRunResDB() self.workflowSpec = WorkflowSpec() self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"]) self.config = self.taskState.configurationDict() finder = NodeFinder(self.taskState.taskName()) self.jobSpec.payload.operate(finder) self.jobSpecNode = finder.result wffinder = NodeFinder(self.taskState.taskName()) self.workflowSpec.payload.operate(wffinder) self.workflowNode = wffinder.result self.inputFiles = self.jobSpecNode.cfgInterface.inputFiles self.globalTag = self.jobSpecNode.cfgInterface.conditionsTag self.inputDataset = self.jobSpecNode._InputDatasets[0] self.runNumber = self.jobSpec.parameters['RunNumber'] self.scenario = self.jobSpec.parameters.get('Scenario', 'relvalmc') self.refHistKey = self.jobSpec.parameters.get('RefHistKey', None) def __call__(self): """ _operator()_ Invoke the setup tool """ msg = "Creating Harvesting Configuration for:\n" msg += " => Dataset: %s\n" % self.inputDataset.name() msg += " => Run Number: %s\n" % self.runNumber msg += " => Global Tag: %s\n" % self.globalTag msg += " => Input Files:\n" for inputfile in self.inputFiles: msg += " => %s\n" % inputfile print msg process = self.importConfigurationLibrary() pycfgDump = open("PyCfgFileDump.log", 'w') try: pycfgDump.write(process.dumpPython()) except Exception, ex: msg = "Error writing python format cfg dump:\n" msg += "%s\n" % str(ex) msg += "This needs to be reported to the framework team" pycfgDump.write(msg) pycfgDump.close() # // # // Save the edited config as PSet.py #// handle = open("PSet.py", 'w') handle.write("import pickle\n") handle.write("pickledCfg=\"\"\"%s\"\"\"\n" % pickle.dumps(process)) handle.write("process = pickle.loads(pickledCfg)\n") handle.close() print "Wrote PSet.py for harvesting" return
import sys, os from ProdAgentDB.Config import defaultConfig as dbConfig from ProdCommon.Database import Session from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec from ProdAgentCore.Configuration import loadProdAgentConfiguration from MergeSensor.MergeSensorDB import MergeSensorDB from JobQueue.JobQueueDB import JobQueueDB import ProdAgent.WorkflowEntities.Aux as WEAux import ProdAgent.WorkflowEntities.Workflow as WEWorkflow workflow = sys.argv[1] workflowSpec = WorkflowSpec() workflowSpec.load(workflow) # // # // Clean out job cache #// config = loadProdAgentConfiguration() compCfg = config.getConfig("JobCreator") creatorCache = os.path.expandvars(compCfg['ComponentDir']) workflowCache = os.path.join(creatorCache, workflowSpec.workflowName()) if os.path.exists(workflowCache): os.system("/bin/rm -rf %s" % workflowCache) Session.set_database(dbConfig)
#!/usr/bin/env python import os import pickle from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWAPILoader import CMSSWAPILoader from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec from ProdCommon.MCPayloads.DatasetTools import getOutputDatasetsWithPSet specfile = "/uscms/home/gutsche/CSA08-JetET110-CSA08_S43_S43_rereco_may19_PIC_v1-Workflow.xml" rawCfgFile = "%s.raw.cfg" % os.path.basename(specfile) origCfgFile = "%s.orig.cfg" % os.path.basename(specfile) dbsCfgFile = "%s.dbs.cfg" % os.path.basename(specfile) spec = WorkflowSpec() spec.load(specfile) rawCfg = spec.payload.cfgInterface.rawCfg originalCfg = spec.payload.cfgInterface.originalCfg dbsDatasets = getOutputDatasetsWithPSet(spec.payload) handle = open(dbsCfgFile, 'w') handle.write(dbsDatasets[0]['PSetContent']) handle.close() handle = open(origCfgFile, 'w') handle.write(originalCfg) handle.close() loader = CMSSWAPILoader(os.environ['SCRAM_ARCH'],
class LogArchMgr: def __init__(self): self.state = TaskState(os.getcwd()) self.state.loadRunResDB() self.state.loadJobSpecNode() # // # // check for store fail settings #// self.workflow = WorkflowSpec() self.workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC']) self.doStoreFail = self.workflow.parameters.get("UseStoreFail", False) if str(self.doStoreFail).lower() == "true": self.doStoreFail = True self.config = self.state.configurationDict() self.inputTasks = self.config.get("InputTasks", []) # #TODO: not really sure this is correct, i would think i should # # take report from stageOut but that is missing if cmsRun fails # # what if cmsRun one is missing - do i want to generate an empty one? # self.inputReport = getTaskState(self.inputTasks[0]).getJobReport() # iterate over input tasks (in reverse order) # find first one with a fjr self.inputTask, self.inputReport = None, None for taskName in self.inputTasks[::-1]: task = getTaskState(taskName) report = task.getJobReport() if report is None: continue self.inputTask = task self.inputReport = report break # if got no valid fjr from previous tasks - # something must have gone wrong earlier - make our own # may need more things set here to make reports mergeable if self.inputReport is None: self.inputTask = self.state self.inputReport = FwkJobReport() self.regexps = self.config.get("LogMatchRegexp", []) self.doStageOut = True doingStageOut = self.config.get("DoStageOut", []) if len(doingStageOut) > 0: control = doingStageOut[-1] if control == "False": self.doStageOut = False self.workflowSpecId = self.config['WorkflowSpecID'][0] self.jobSpecId = self.state.jobSpecNode.jobName self.compRegexps = [] for regexp in self.regexps: self.compRegexps.append(re.compile(regexp)) # TODO: These should be pulled in from the workflow now not the config thing self.override = False soParams = self.config.get('StageOutParameters', {}) self.override = soParams.has_key("Override") self.overrideParams = {} if self.override: overrideConf = self.config['StageOutParameters']['Override'] self.overrideParams = { "command" : None, "option" : None, "se-name" : None, "lfn-prefix" : None, } try: self.overrideParams['command'] = overrideConf['command'][0] self.overrideParams['se-name'] = overrideConf['se-name'][0] self.overrideParams['lfn-prefix'] = overrideConf['lfn-prefix'][0] except StandardError, ex: msg = "Unable to extract Override parameters from config:\n" msg += str(self.config['StageOutParameters']) raise StageOutInitError(msg) if overrideConf.has_key('option'): if len(overrideConf['option']) > 0: self.overrideParams['option'] = overrideConf['option'][-1] else: self.overrideParams['option'] = ""
raise RuntimeError, msg for key, val in override.items(): if val == None: msg = "--%s Option Not Provided\n" % key msg += "This option is required\n" raise RuntimeError, msg if workflowSpec != None: if not os.path.exists(workflowSpec): msg = "Workflow Spec file Not Found:\n%s\n" % workflowSpec raise RuntimeError, msg spec = WorkflowSpec() spec.load(workflowSpec) specFile = workflowSpec if jobSpec != None: if not os.path.exists(jobSpec): msg = "Job Spec file Not Found:\n%s\n" % jobSpec raise RuntimeError, msg spec = JobSpec() spec.load(jobSpec) specFile = jobSpec allNames = listAllNames(spec.payload) if stageOutNode not in allNames: msg = "Error: Cannot find Node named %s in spec\n" % stageOutNode msg += "Node names are: %s" % allNames