class JobSpecExpander: def __init__(self, jobSpecFile): self.jobSpec = JobSpec() self.jobSpec.load(jobSpecFile) self.taskState = TaskState(os.getcwd()) self.taskState.loadRunResDB() self.workflowSpec = WorkflowSpec() self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"]) self.config = self.taskState.configurationDict() finder = NodeFinder(self.taskState.taskName()) self.jobSpec.payload.operate(finder) self.jobSpecNode = finder.result wffinder = NodeFinder(self.taskState.taskName()) self.workflowSpec.payload.operate(wffinder) self.workflowNode = wffinder.result if self.jobSpecNode.jobType != "Merge": if self.config.has_key('Configuration'): try: self.createPSet() except Exception, ex: msg = "Unable to generate cmsRun Config from JobSpec:\n" msg += str(ex) print msg badfile = open("exit.status", 'w') badfile.write("10040") badfile.close() else:
def createCleanupWorkflowSpec(): """ _createCleanupWorkflowSpec_ Create a generic cleanup WorkflowSpec definition that can be used to generate a sanbox for cleanup jobs """ timestamp = str(time.asctime(time.localtime(time.time()))) timestamp = timestamp.replace(" ", "-") timestamp = timestamp.replace(":", "_") workflow = WorkflowSpec() workflow.setWorkflowName("CleanUp-%s" % timestamp) workflow.setActivity("CleanUp") workflow.setRequestCategory("mc-cleanup") workflow.setRequestTimestamp(timestamp) workflow.parameters['WorkflowType']="CleanUp" cleanUp = workflow.payload cleanUp.name = "cleanUp1" cleanUp.type = "CleanUp" cleanUp.application["Project"] = "" cleanUp.application["Version"] = "" cleanUp.application["Architecture"] = "" cleanUp.application["Executable"] = "RuntimeCleanUp.py" # binary name cleanUp.configuration = "" cleanUp.cfgInterface = None return workflow
def load(self, improvNode): """ _load_ Extract information for this object from the improv instance provided """ wfQuery = IMProvQuery("/RequestSpec/WorkflowSpec") wfnode = wfQuery(improvNode)[0] wfspec = WorkflowSpec() wfspec.loadFromNode(wfnode) self.workflow = wfspec policyQuery = IMProvQuery("/RequestSpec/Policies/*") detailQuery = IMProvQuery("/RequestSpec/RequestDetails/*") preferredPAQuery = IMProvQuery("/RequestSpec/PreferredPA") policies = policyQuery(improvNode) details = detailQuery(improvNode) preferredPAs = preferredPAQuery(improvNode) for policy in policies: self.policies[str(policy.name)] = str(policy.chardata) for detail in improvNode.attrs.keys(): self.requestDetails[detail] = str(improvNode.attrs[detail]) for preferredPA in preferredPAs: self.preferredPAs[str(preferredPA.attrs['id'])] = \ str(preferredPA.attrs['priority']) return
def getCMSSoft(work,reverse=False): """ opens the workflowfile and gets the CMSSoft version if reverse, returns a map between CMSSoft version and real workflowname """ new_work={} workflowSpec = WorkflowSpec() for fil in work: try: workflowSpec.load(fil) cmssw=workflowSpec.payload.application['Version'] name=workflowSpec.parameters['WorkflowName'] if reverse: if not new_work.has_key(cmssw): new_work[cmssw]=[] new_work[cmssw].append(name) else: new_work[name]=cmssw except: """ something went wrong """ msg="WorkflowConstraints getCMSSoft: something went wrong while handling file "+fil print(msg) return new_work
def add(self, workflowFile): """ _add_ Add a dataset to the list of watched datasets. Arguments: workflowFile -- the workflow specification file Return: the datasetId """ # read the WorkflowSpecFile try: wfile = WorkflowSpec() wfile.load(workflowFile) # wrong dataset file except Exception, msg: raise InvalidDataset, \ "Error loading workflow specifications from %s" % workflowFile
def makeJobs(self, testInstance): """ _makeJobs_ Create Job Specs for the test instance provided """ logging.info("Creating Jobs for test %s at site %s" % ( testInstance['Name'], testInstance['Site']) ) testName = testInstance['WorkflowSpecId'] specInstance = WorkflowSpec() specInstance.load(testInstance['WorkflowSpecFile']) if testInstance['InputDataset'] == None: initialRun = self.jobCounts.get(testInstance['Name'], 1) factory = RequestJobFactory( specInstance, testInstance['WorkingDir'], testInstance['TotalEvents'], InitialRun = initialRun, EventsPerJob = testInstance['EventsPerJob'], Sites = [testInstance['Site']]) jobsList = factory() self.jobCounts[testInstance['Name']] += len(jobsList) else: factory = DatasetJobFactory( specInstance, testInstance['WorkingDir'], specInstance.parameters['DBSURL'], ) jobsList = factory() self.jobCounts[testInstance['Name']] += len(jobsList) msg = "Created %s jobs:\n" % len(jobsList) for job in jobsList: jobSpecFile = job['JobSpecFile'] jobSpecId = job['JobSpecId'] msg += " %s\n" % jobSpecId testInstance['JobSpecs'][jobSpecId] = jobSpecFile logging.info(msg) return
def GoodWorkflow(workflow): """ Check if workflow can be loaded """ RequestDir,firstrun = getRequestInjectorConfig() workflowCache="%s/WorkflowCache"%RequestDir workflowSpec = WorkflowSpec() try: workflowSpec.load(workflow) except: return False return True
def loadWorkflow(self, specFile): """ _loadWorkflow_ Helper method, since every plugin will have to do something with a workflow """ spec = WorkflowSpec() try: spec.load(specFile) except Exception, ex: msg = "Unable to read workflow spec file:\n%s\n" % specFile msg += str(ex) raise RuntimeError, msg
class FactoryInterface: """ _FactoryInterface_ JobSpec Factory Interface defintion & common utils for all job spec factory generators """ def __init__(self, workflowSpec): # or use isinstance(WorkflowSpec) if need to include sub classes if workflowSpec.__class__ is WorkflowSpec: self.workflow = workflowSpec else: self.workflow = WorkflowSpec() self.workflow.load(workflowSpec)
def __init__(self, workflowSpec): # or use isinstance(WorkflowSpec) if need to include sub classes if workflowSpec.__class__ is WorkflowSpec: self.workflow = workflowSpec else: self.workflow = WorkflowSpec() self.workflow.load(workflowSpec)
def createProductionWorkflow(prodName, cmsswVersion, cfgFile=None, category="mc", **args): """ _createProductionWorkflow_ Create a Production style workflow, ie generation of new events """ timestamp = int(time.time()) if args.get("PyCfg", None) == None: if cfgFile == None: msg = "Error: No Cfg File or python cfg file provided to createProductionWorkflow" raise RuntimeError, msg pycfgFile = createPythonConfig(cfgFile) pycfgFileContent = file(pycfgFile).read() else: pycfgFileContent = args['PyCfg'] if args.get("PSetHash", None) == None: realPSetHash = createPSetHash(cfgFile) else: realPSetHash = args['PSetHash'] # // # // Create a new WorkflowSpec and set its name #// spec = WorkflowSpec() workflowname = "%s__%s-%s-%s-%s" % ( prodName, cmsswVersion, args.get("processingLabel", "Test07"), args.get("physicsGroup", "NoPhysicsGroup"), timestamp) spec.setWorkflowName(workflowname) spec.setRequestCategory(category) spec.setRequestTimestamp(timestamp) cmsRun = spec.payload populateCMSRunNode(cmsRun, "cmsRun1", cmsswVersion, pycfgFileContent, realPSetHash, timestamp, prodName, physicsGroup=args.get("physicsGroup", "NoPhysicsGroup"), processingLabel=args.get("processingLabel", "Test07"), fakeHash=args.get("FakeHash", False)) addStageOutNode(cmsRun, "stageOut1") generateFilenames(spec) return spec
def __init__(self, config, msgSvcRef, **workflowDetails): self.configuration = config self.msgSvcRef = msgSvcRef self.workflowDetails = workflowDetails self.workflow = workflowDetails['id'] self.workflowFile = workflowDetails['workflow_spec_file'] self.workflowSpec = WorkflowSpec() self.workflowSpec.load(self.workflowFile) self.doMigration = self.configuration.get("MigrateToGlobal", True) self.doInjection = self.configuration.get("InjectToPhEDEx", True)
def createLogCollectorWorkflowSpec(wf): """ _createLogColectorWorkflowSpec_ Create a generic LogArchive WorkflowSpec definition """ timestamp = str(time.asctime(time.localtime(time.time()))) timestamp = timestamp.replace(" ", "-") timestamp = timestamp.replace(":", "_") workflow = WorkflowSpec() workflow.setWorkflowName("LogCollect-%s" % timestamp) workflow.setActivity("LogCollect") workflow.setRequestCategory("logcollect") workflow.setRequestTimestamp(timestamp) workflow.parameters["WorkflowType"] = "LogCollect" logArchive = workflow.payload logArchive.name = "logCollect1" logArchive.type = "LogCollect" # TODO: remove this? # logArchive.workflow = wf logArchive.configuration logArchive.application["Project"] = "" logArchive.application["Version"] = "" logArchive.application["Architecture"] = "" logArchive.application["Executable"] = "RuntimeLogCollector.py" # binary name logArchive.configuration = "" logArchive.cfgInterface = None # set stageOut override # cfg = IMProvNode("config") # stageOut = IMProvNode("StageOutParameters") # cfg.addNode() # WorkflowTools.addStageOutNode(logArchive, "StageOut1") # WorkflowTools.addStageOutOverride(logArchive, stageOutParams['command'], # stageOutParams['option'], # stageOutParams['se-name'], # stageOutParams['lfnPrefix']) return workflow
def createWorkflow(self, runNumber, primaryDataset, processedDataset, dataTier): """ _createWorkflow_ Create a workflow for a given run and primary dataset. If the workflow has been created previously, load it and use it. """ jobCache = os.path.join(self.args["ComponentDir"], "T0ASTPlugin", "Run" + runNumber) if not os.path.exists(jobCache): os.makedirs(jobCache) workflowSpecFileName = "DQMHarvest-Run%s-%s-workflow.xml" % (runNumber, primaryDataset) workflowSpecPath = os.path.join(jobCache, workflowSpecFileName) if os.path.exists(workflowSpecPath): msg = "Loading existing workflow for dataset: %s\n " % primaryDataset msg += " => %s\n" % workflowSpecPath logging.info(msg) workflowSpec = WorkflowSpec() workflowSpec.load(workflowSpecPath) return (workflowSpec, workflowSpecPath) msg = "No workflow found for dataset: %s\n " % primaryDataset msg += "Looking up software version and generating workflow..." recoConfig = self.t0astWrapper.listRecoConfig(runNumber, primaryDataset) if not recoConfig["DO_RECO"]: logging.info("RECO disabled for dataset %s" % primaryDataset) return (None, None) globalTag = self.args.get("OverrideGlobalTag", None) if globalTag == None: globalTag = recoConfig["GLOBAL_TAG"] cmsswVersion = self.args.get("OverrideCMSSW", None) if cmsswVersion == None: cmsswVersion = recoConfig["CMSSW_VERSION"] datasetPath = "/%s/%s/%s" % (primaryDataset, processedDataset, dataTier) workflowSpec = createHarvestingWorkflow(datasetPath, self.site, self.args["CmsPath"], self.args["ScramArch"], cmsswVersion, globalTag, configFile=self.args["ConfigFile"], DQMServer=self.args['DQMServer'], proxyLocation=self.args['proxyLocation'], DQMCopyToCERN=self.args['DQMCopyToCERN'], doStageOut=self.args['DoStageOut']) workflowSpec.save(workflowSpecPath) msg = "Created Harvesting Workflow:\n %s" % workflowSpecPath logging.info(msg) self.publishWorkflow(workflowSpecPath, workflowSpec.workflowName()) return (workflowSpec, workflowSpecPath)
def makeWorkflow(self): """ _makeWorkflow_ Generate a workflow. If the self.configFile parameter has been set this will attempt to load the config from file, otherwise it will create an empty process object which will get filled in by the runtime script. """ self.timestamp = int(time.time()) self.workflow = WorkflowSpec() self.workflowName = "AlcaSkim-Run%s-%s" % \ (self.run, self.primaryDataset) self.workflow.setWorkflowName(self.workflowName) self.workflow.setRequestCategory("data") self.workflow.setRequestTimestamp(self.timestamp) self.workflow.parameters["WorkflowType"] = "Processing" self.workflow.parameters["ProdRequestID"] = self.run self.workflow.parameters["RunNumber"] = self.run self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"] self.cmsRunNode = self.workflow.payload self.cmsRunNode.name = "cmsRun1" self.cmsRunNode.type = "CMSSW" self.cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"] self.cmsRunNode.application["Executable"] = "cmsRun" self.cmsRunNode.application["Project"] = "CMSSW" self.cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"] inputDataset = self.cmsRunNode.addInputDataset(self.primaryDataset, self.parentProcessedDataset) inputDataset["DataTier"] = "RECO" if self.configFile == None: self.loadProcessFromFramework() else: self.loadProcessFromFile() self.setupOutputModules() WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") WorkflowTools.generateFilenames(self.workflow) return self.workflow
def createJobSpec(jobSpecId, workflowSpecFile, filename, runNumber, eventCount, firstEvent=None, saveString=False, loadString=True): # // # // Load workflow #// workflowSpec = WorkflowSpec() if loadString: workflowSpec.loadString(workflowSpecFile) else: workflowSpec.load(workflowSpecFile) # // # // Create JobSpec #// jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % (workflowSpec.workflowName(), runNumber) #jobSpec.setJobName(jobName) jobSpec.setJobName(jobSpecId) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = runNumber jobSpec.parameters['EventCount'] = eventCount jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) if firstEvent != None: jobSpec.parameters['FirstEvent'] = firstEvent cfgMaker = ConfigGenerator(jobSpec) jobSpec.payload.operate(cfgMaker) if saveString: return jobSpec.saveString() jobSpec.save(filename) return
def __init__(self, jobSpecFile): self.jobSpec = JobSpec() self.jobSpec.load(jobSpecFile) self.taskState = TaskState(os.getcwd()) self.taskState.loadRunResDB() self.workflowSpec = WorkflowSpec() self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"]) self.config = self.taskState.configurationDict() finder = NodeFinder(self.taskState.taskName()) self.jobSpec.payload.operate(finder) self.jobSpecNode = finder.result wffinder = NodeFinder(self.taskState.taskName()) self.workflowSpec.payload.operate(wffinder) self.workflowNode = wffinder.result tier0Merge = self.workflowSpec.parameters.get("Tier0Merge", "False") if self.jobSpecNode.jobType != "Merge" or tier0Merge == "True": if self.config.has_key('Configuration'): #try: self.createPSet() #except Exception, ex: # msg = "Unable to generate cmsRun Config from JobSpec:\n" # msg += str(ex) # print msg # badfile = open("exit.status", 'w') # badfile.write("10040") # badfile.close() else: # // # // Merge job #// self.createMergePSet() # do after pset created to get correct input files self.setJobDetails() if self.config.has_key('UserSandbox'): self.userSandbox()
def createProductionWorkflow(prodName, cmsswVersion, cfgFile = None, category = "mc", **args): """ _createProductionWorkflow_ Create a Production style workflow, ie generation of new events """ timestamp = int(time.time()) if args.get("PyCfg", None) == None: if cfgFile == None: msg = "Error: No Cfg File or python cfg file provided to createProductionWorkflow" raise RuntimeError, msg pycfgFile = createPythonConfig(cfgFile) pycfgFileContent = file(pycfgFile).read() else: pycfgFileContent = args['PyCfg'] if args.get("PSetHash", None) == None: realPSetHash = createPSetHash(cfgFile) else: realPSetHash = args['PSetHash'] # // # // Create a new WorkflowSpec and set its name #// spec = WorkflowSpec() workflowname = "%s__%s-%s-%s-%s"%(prodName,cmsswVersion,args.get("processingLabel","Test07"),args.get("physicsGroup","NoPhysicsGroup"),timestamp) spec.setWorkflowName(workflowname) spec.setRequestCategory(category) spec.setRequestTimestamp(timestamp) cmsRun = spec.payload populateCMSRunNode(cmsRun, "cmsRun1", cmsswVersion, pycfgFileContent, realPSetHash, timestamp, prodName, physicsGroup = args.get("physicsGroup", "NoPhysicsGroup"), processingLabel=args.get("processingLabel", "Test07"), fakeHash = args.get("FakeHash", False)) addStageOutNode(cmsRun, "stageOut1") generateFilenames(spec) return spec
def __init__(self, workflowSpecFile, workingDir): self.workflow = workflowSpecFile self.workingDir = workingDir self.count = 0 self.runIncrement = 1 self.currentJob = None self.sitePref = None self.pileupDatasets = {} self.ownedJobSpecs = {} # // # // Initially hard coded, should be extracted from Component Config #// self.eventsPerJob = 10 self.workflowSpec = WorkflowSpec() try: self.workflowSpec.load(workflowSpecFile) except: logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile)) return if self.workflowSpec.parameters.get("RunIncrement", None) != None: self.runIncrement = int( self.workflowSpec.parameters['RunIncrement'] ) self.generators = GeneratorMaker() self.workflowSpec.payload.operate(self.generators) # // # // Cache Area for JobSpecs #// self.specCache = os.path.join( self.workingDir, "%s-Cache" %self.workflowSpec.workflowName()) if not os.path.exists(self.specCache): os.makedirs(self.specCache)
def createJobSpec(jobSpecId,workflowSpecFile, filename, runNumber, eventCount, firstEvent = None,saveString=False,loadString=True): # // # // Load workflow #// workflowSpec = WorkflowSpec() if loadString: workflowSpec.loadString(workflowSpecFile) else: workflowSpec.load(workflowSpecFile) # // # // Create JobSpec #// jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % ( workflowSpec.workflowName(), runNumber ) #jobSpec.setJobName(jobName) jobSpec.setJobName(jobSpecId) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = runNumber jobSpec.parameters['EventCount'] = eventCount jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) if firstEvent != None: jobSpec.parameters['FirstEvent'] = firstEvent cfgMaker = ConfigGenerator(jobSpec) jobSpec.payload.operate(cfgMaker) if saveString: return jobSpec.saveString() jobSpec.save(filename) return
def __init__(self, workflowSpec, jobSpec): self.workflowSpec = WorkflowSpec() self.workflowSpec.load(workflowSpec) self.jobSpec = JobSpec() self.jobSpec.load(jobSpec) taskState = TaskState(os.getcwd()) taskState.loadRunResDB() jobSpecFinder = NodeFinder(taskState.taskName()) self.jobSpec.payload.operate(jobSpecFinder) self.jobSpecNode = jobSpecFinder.result workflowFinder = NodeFinder(taskState.taskName()) self.workflowSpec.payload.operate(workflowFinder) self.workflowNode = workflowFinder.result self.run = None self.lumis = [] self.streamerFiles = [] self.activeDatasets = []
def __init__(self): self.jobSpec = JobSpec() self.jobSpec.load(os.environ['PRODAGENT_JOBSPEC']) self.taskState = TaskState(os.getcwd()) self.taskState.loadRunResDB() self.workflowSpec = WorkflowSpec() self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"]) self.config = self.taskState.configurationDict() finder = NodeFinder(self.taskState.taskName()) self.jobSpec.payload.operate(finder) self.jobSpecNode = finder.result wffinder = NodeFinder(self.taskState.taskName()) self.workflowSpec.payload.operate(wffinder) self.workflowNode = wffinder.result self.inputFiles = self.jobSpecNode.cfgInterface.inputFiles self.globalTag = self.jobSpecNode.cfgInterface.conditionsTag self.inputDataset = self.jobSpecNode._InputDatasets[0] self.runNumber = self.jobSpec.parameters['RunNumber'] self.scenario = self.jobSpec.parameters.get('Scenario', 'relvalmc') self.refHistKey = self.jobSpec.parameters.get('RefHistKey', None)
class WorkflowMaker: """ _WorkflowMaker_ Basic MC workflow maker for PR to use to create workflow spec files. """ def __init__(self, requestId, channel, label): self.requestId = requestId self.group = None self.label = label self.timestamp = int(time.time()) self.channel = channel self.cmsswVersions = [] self.configurations = [] self.psetHashes = {} self.origCfgs = {} self.acquisitionEra = None self.processingString = None self.processingVersion = None self.conditions = None # turn on use of proper naming convention for datasets # should be made the default soon, lets deprecate all the old crap self.useProperNamingConventions = False self.options = {} self.options.setdefault('FakeHash', False) # Should we use another attribute for setting the output dataset # status in DBS? self.outputDatasetStatus = 'VALID' self.inputDataset = {} self.inputDataset['IsUsed'] = False self.inputDataset['DatasetName'] = None self.inputDataset['Primary'] = None self.inputDataset['Processed'] = None self.inputDataset['DataTier'] = None # // # // Extra controls over input dataset if required #// self.inputDataset['SplitType'] = None self.inputDataset['SplitSize'] = None self.inputDataset['OnlySites'] = None self.inputDataset['OnlyBlocks'] = None self.inputDataset['OnlyClosedBlocks'] = True # // # // Pileup Dataset controls #// self.pileupDatasets = [] # // # // Initialise basic workflow #// self.workflow = WorkflowSpec() self.workflowName = "%s-%s-%s" % (label, channel, requestId) self.workflow.setWorkflowName(self.workflowName) self.workflow.setRequestCategory("mc") self.workflow.setRequestTimestamp(self.timestamp) self.workflow.parameters['RequestLabel'] = self.label self.workflow.parameters['ProdRequestID'] = self.requestId self.cmsRunNode = self.workflow.payload self.cmsRunNode.name = "cmsRun1" self.cmsRunNode.type = "CMSSW" self.cmsRunNodes = [self.cmsRunNode] self.saveOutputFor = [] def chainCmsRunNode(self, stageOutIntermediates=False, *outputModules): """ append a cmsRun config to the current cmsRun node and chain them """ if stageOutIntermediates: #Do we want to keep cmsRunNode's products? self.saveOutputFor.append(self.cmsRunNode.name) newnode = self.cmsRunNode.newNode("cmsRun%s" % (len(self.cmsRunNodes) + 1)) newnode.type = "CMSSW" if not outputModules: outputModules = self.configurations[-1].outputModules.keys() for outmodule in outputModules: newnode.addInputLink(self.cmsRunNode.name, outmodule, 'source', AppearStandalone=not stageOutIntermediates) self.cmsRunNode = newnode self.cmsRunNodes.append(newnode) def changeCategory(self, newCategory): """ _changeCategory_ Change the workflow category from the default mc that appears in the LFNs """ self.workflow.setRequestCategory(newCategory) return def setAcquisitionEra(self, era): """ _setAcquisitionEra_ Sets the AcquisitionEra in the workflow """ self.workflow.setAcquisitionEra(era) self.acquisitionEra = era return def setNamingConventionParameters(self, era, procString, procVers): """ _setNamingConventionParameters_ Sets AcquisitionEra, ProcessingString and ProcessingVersion """ self.workflow.setAcquisitionEra(era) self.workflow.parameters['ProcessingString'] = procString self.workflow.parameters['ProcessingVersion'] = procVers self.acquisitionEra = era self.processingString = procString self.processingVersion = procVers self.useProperNamingConventions = True return def setActivity(self, activity): """ _changeWorkflowType_ Set the workflow type i.e. Simulation, Reconstruction, Reprocessing, Skimming """ self.workflow.setActivity(activity) return def setCMSSWVersion(self, version): """ _setCMSSWVersion_ Set the version of CMSSW to be used """ self.cmsswVersions.append(version) self.cmsRunNode.application['Version'] = version self.cmsRunNode.application['Executable'] = "cmsRun" self.cmsRunNode.application['Project'] = "CMSSW" self.cmsRunNode.application['Architecture'] = "" return def setUserSandbox(self, sandboxloc): """ _setSandbox_ Sets the location of the user sandbox """ self.cmsRunNode.userSandbox = sandboxloc return def setPhysicsGroup(self, group): """ _setPhysicsGroup_ Physics Group owning the workflow """ self.group = group self.workflow.parameters['PhysicsGroup'] = self.group return def setConfiguration(self, cfgFile, **args): """ _setConfiguration_ Provide the CMSSW configuration to be used. By default, assume that cfgFile is a python format string. The format & type can be specified using args: - Type : must be "file" or "string" or "instance" """ cfgType = args.get("Type", "instance") if cfgType not in ("file", "string", "instance"): msg = "Illegal Type for cfg file: %s\n" % cfgType msg += "Should be \"file\" or \"string\"\n" raise RuntimeError, msg cfgContent = cfgFile if cfgType == "file": cfgContent = file(cfgFile).read() cfgType = "string" if cfgType == "string": cfgData = cfgContent cfgContent = CMSSWConfig() cfgContent.unpack(cfgData) self.cmsRunNode.cfgInterface = cfgContent self.configurations.append(cfgContent) return def setOriginalCfg(self, honkingGreatString): """ _setOriginalCfg_ Set the original cfg file content that is to be recorded in DBS CALL THIS METHOD AFTER setConfiguration """ sep = '\n\n### Next chained config file ###\n\n' cfg = '' for link in self.cmsRunNode._InputLinks: if link['AppearStandalone']: prev_config = self.origCfgs.get(link['InputNode'], '') if prev_config: cfg = '%s%s%s' % (cfg, prev_config, sep) cfg = '%s%s' % (cfg, honkingGreatString) self.cmsRunNode.cfgInterface.originalCfg = cfg self.origCfgs[self.cmsRunNode.name] = cfg return def setPSetHash(self, hashValue): """ _setPSetHash_ Set the value for the PSetHash If any InputLinks are present their pset hashes are prepended """ hash = '' for link in self.cmsRunNode._InputLinks: if link['AppearStandalone']: prev_node_hash = self.psetHashes.get(link['InputNode'], None) if prev_node_hash: # cmsGen nodes will be missing hash = '%s%s_' % (hash, prev_node_hash) hash = '%s%s' % (hash, hashValue) self.psetHashes[self.cmsRunNode.name] = hash return def addInputDataset(self, datasetPath): """ _addInputDataset_ If this workflow processes a dataset, set that here NOTE: Is possible to also specify - Split Type (file or event) - Split Size (int) - input DBS Not sure how many of these we want to use. For now, they can be added to the inputDataset dictionary """ datasetBits = DatasetConventions.parseDatasetPath(datasetPath) self.inputDataset.update(datasetBits) self.inputDataset['IsUsed'] = True self.inputDataset['DatasetName'] = datasetPath return def addPileupDataset(self, datasetName, filesPerJob=10, targetModule=None): """ _addPileupDataset_ Add a dataset to provide pileup overlap. filesPerJob should be 1 in 99.9 % of cases """ pileupDataset = {} pileupDataset['Primary'] = None pileupDataset['Processed'] = None pileupDataset['DataTier'] = None datasetBits = DatasetConventions.parseDatasetPath(datasetName) pileupDataset.update(datasetBits) pileupDataset['FilesPerJob'] = filesPerJob # Target module coould be 'MixingModule' or 'DataMixingModule' for # the moment. If None, MixingModule will be used. pileupDataset['TargetModule'] = targetModule self.pileupDatasets.append(pileupDataset) return def addFinalDestination(self, *phedexNodeNames): """ _addFinalDestination_ Add a final destination that can be used to generate a PhEDEx subscription so that the data gets transferred to some final location. NOTE: Do we want to support a list of PhEDEx nodes? Eg CERN + FNAL """ nameList = "" for nodeName in phedexNodeNames: nameList += "%s," % nodeName nameList = nameList[:-1] self.workflow.parameters['PhEDExDestination'] = nameList return def addSelectionEfficiency(self, selectionEff): """ _addSelectionEfficiency_ Do we have a selection efficiency? """ self.cmsRunNode.applicationControls["SelectionEfficiency"] = \ selectionEff return def setOutputDatasetDbsStatus(self, status): """ _setOutputDatasetDbsStatus_ The output datasets will have this status in the field dataset.status. This value will be use when registering the output dataset in DBS. Only two values are acepted: - VALID - PRODUCTION """ if status in ('VALID', 'PRODUCTION'): self.outputDatasetStatus = status return def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[ keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel=self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join( (self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra=self.acquisitionEra, ProcessingString=processingString, ProcessingVersion=self.processingVersion, Unmerged=True) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version=cmsRunNode.application['Version'], Label=self.label, Group=self.group, FilterName=filterName, RequestId=self.requestId, Unmerged=True) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra=self.acquisitionEra, Conditions=self.workflow.parameters['Conditions'], ProcessingVersion=self.workflow. parameters['ProcessingVersion'], FilterName=filterName, Unmerged=True) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset[ 'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets[ '%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow def _Validate(self): """ _Validate_ Private method to test all options are set. Throws a WorkflowMakerError if any problems found """ notNoneAttrs = [ "requestId", "label", "group", "channel", ] for attrName in notNoneAttrs: value = getattr(self, attrName, None) if value == None: msg = "Attribute Not Set: %s" % attrName raise WorkflowMakerError(msg) if not len(self.configurations): msg = "Attribute Not Set: configurations" raise WorkflowMakerError(msg) if len(self.configurations) != len(self.cmsswVersions): msg = "len(self.configurations) != len(self.cmsswVersions)" raise WorkflowMakerError(msg) return
class RepackerSetup: """ _RepackerSetup_ Object to manipulate the Configuration files for a repacker job - Extract the details of the repacker job entity stored in the config - Pull in the lumi server information and add it to the config """ def __init__(self, workflowSpec, jobSpec): self.workflowSpec = WorkflowSpec() self.workflowSpec.load(workflowSpec) self.jobSpec = JobSpec() self.jobSpec.load(jobSpec) taskState = TaskState(os.getcwd()) taskState.loadRunResDB() jobSpecFinder = NodeFinder(taskState.taskName()) self.jobSpec.payload.operate(jobSpecFinder) self.jobSpecNode = jobSpecFinder.result workflowFinder = NodeFinder(taskState.taskName()) self.workflowSpec.payload.operate(workflowFinder) self.workflowNode = workflowFinder.result self.run = None self.lumis = [] self.streamerFiles = [] self.activeDatasets = [] def unpackJobEntity(self): """ _unpackJobEntity_ Get the StreamerJobEntity from the JobSpec node """ repackJobEntity = self.jobSpecNode.cfgInterface.extensions.get('Streamer', None) if repackJobEntity == None: msg = "No StreamerJobEntity in JobSpec configuration\n" msg += "This is required for repacker jobs\n" raise RuntimeError, msg # Get run and lumi numbers for this job self.run = repackJobEntity.data['runNumber'] self.lumis = repackJobEntity.data['lumiSections'] print "Repacker Job Handling Run:%s\n LumiSections: %s\n" % (self.run,self.lumis) # Sort streamer input by lumi ID for time ordering self.streamerFiles = sortByValue(repackJobEntity.data['streamerFiles']) msg = "Streamer Files for this job are:\n" for strmr in self.streamerFiles: msg += " %s\n" % strmr print msg # Get list of active datasets for this job ## self.activeDatasets = repackJobEntity.data['activeOutputModules'] ## msg = "This Job Will repack datasets:\n" ## for dataset in self.activeDatasets: ## msg += " %s\n" % dataset ## print msg return def backupPSet(self,filename,process): """ _backupPSet_ Write a backup copy of the current PSet to disk. """ print "Wrote current configurations as %s" % filename handle = open(filename, 'w') handle.write("import pickle\n") handle.write("pickledCfg=\"\"\"%s\"\"\"\n" % pickle.dumps(process)) handle.write("process = pickle.loads(pickledCfg)\n") handle.close() return def importAndBackupProcess(self): """ _importAndBackupProcess_ Try to import the process object for the job, which is contained in PSet.py and save a backup copy of it. """ try: from PSet import process except ImportError, ex: msg = "Failed to import PSet module containing cmsRun Config\n" msg += str(ex) raise RuntimeError, msg print "PSet.py imported" self.backupPSet("PSetPreRepack.log",process) return process
class PromptRecoWorkflow(FactoryInterface): """ _PromptWorkflow_ Factory to build workflows for PromptReco jobs. """ def __init__(self, runNumber, version, cmsPath, scramArch): FactoryInterface.__init__(self, version, cmsPath, scramArch) self.run = runNumber self.workflow = None self.timestamp = None self.cmsRunNode = None self.workflowName = None self.configFile = None self.useLazyDownload = False self.primaryDataset = None self.processedDataset = None self.parentProcessedDataset = None self.acquisitionEra = None self.processingVersion = None def setConfigFile(self, configFile): """ _setConfigFile_ Set the config file that will be loaded into the workflow. """ self.configFile = configFile def setPrimaryDataset(self, primaryDatasetName): """ _setPrimaryDataset_ Set the primary dataset that this workflow will run over. """ self.primaryDataset = primaryDatasetName return def setProcessedDataset(self, processedDatasetName): """ _setProcessedDataset_ Set the processed dataset that this workflow will produce. """ self.processedDataset = processedDatasetName return def setParentProcessedDataset(self, parentProcessedDatasetName): """ _setParentProcessedDataset_ Set the parent processed dataset for this workflow. """ self.parentProcessedDataset = parentProcessedDatasetName return def setAcquisitionEra(self, acquisitionEra): """ _setAcquisitionEra_ Set the acquisition era. """ self.acquisitionEra = acquisitionEra return def setProcessingVersion(self, processingVersion): """ _setProcessingVersion_ Set the processing version. """ self.processingVersion = processingVersion return def setLazyDownload(self, useLazyDownload): """ _setLazyDownload_ This enables/disables lazy download mode in the framework. """ self.useLazyDownload = useLazyDownload def setupOutputModule(self, outputModuleName, dataTier): """ _setupOutputModule_ Create the outputModule and outputDataset sections of the workflow. """ outputDataset = self.cmsRunNode.addOutputDataset(self.primaryDataset, self.processedDataset, outputModuleName) outputDataset["NoMerge"] = "True" outputDataset["DataTier"] = dataTier outputDataset["ApplicationName"] = "cmsRun" outputDataset["ApplicationProject"] = "CMSSW" outputDataset["ApplicationVersion"] = self.cmssw["CMSSWVersion"] outputDataset["ApplicationFamily"] = outputModuleName outputDataset["ParentDataset"] = "/%s/%s/%s" % (self.primaryDataset, self.parentProcessedDataset, "RAW") cfgWrapper = self.workflow.payload.cfgInterface outputModule = cfgWrapper.getOutputModule(outputModuleName) outputModule["catalog"] = '%s-Catalog.xml' % outputModule['Name'] outputModule["primaryDataset"] = self.primaryDataset outputModule["processedDataset"] = self.processedDataset outputModule["dataTier"] = dataTier outputModule["acquisitionEra"] = self.acquisitionEra outputModule["processingVersion"] = self.processingVersion outputDataset["LFNBase"] = getLFN(outputModule, self.run, Unmerged = True) outputDataset["MergedLFNBase"] = getLFN(outputModule, self.run) outputModule["LFNBase"] = outputDataset["LFNBase"] outputModule["MergedLFNBase"] = outputDataset["MergedLFNBase"] outputModule["fileName"] = "%s.root" % outputModule['Name'] outputModule["logicalFileName"] = os.path.join( outputDataset["LFNBase"], "PromptReco.%s.root" % dataTier) return def makeWorkflow(self): """ _makeWorkflow_ Generate a workflow. If the self.configFile parameter has been set this will attempt to load the config from file, otherwise it will create an empty process object which will get filled in by the runtime script. """ self.timestamp = int(time.time()) self.workflow = WorkflowSpec() self.workflowName = "PromptReco-Run%s-%s" % (self.run, self.primaryDataset) self.workflow.setWorkflowName(self.workflowName) self.workflow.setRequestCategory("data") self.workflow.setRequestTimestamp(self.timestamp) self.workflow.parameters["WorkflowType"] = "Processing" self.workflow.parameters["ProdRequestID"] = self.run self.workflow.parameters["RunNumber"] = self.run self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"] self.cmsRunNode = self.workflow.payload self.cmsRunNode.name = "cmsRun1" self.cmsRunNode.type = "CMSSW" self.cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"] self.cmsRunNode.application["Executable"] = "cmsRun" self.cmsRunNode.application["Project"] = "CMSSW" self.cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"] inputDataset = self.cmsRunNode.addInputDataset(self.primaryDataset, self.parentProcessedDataset) inputDataset["DataTier"] = "RAW" if self.configFile == None: self.loadProcessFromFramework() else: self.loadProcessFromFile() WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") WorkflowTools.generateFilenames(self.workflow) return self.workflow def loadProcessFromFile(self): """ _loadProcessFromFile_ Load the config file into the workflow. """ preExecScript = self.cmsRunNode.scriptControls["PreExe"] preExecScript.append("T0.PromptRecoInjector.RuntimePromptReco") cfgBaseName = os.path.basename(self.configFile).replace(".py", "") cfgDirName = os.path.dirname(self.configFile) modPath = imp.find_module(cfgBaseName, [cfgDirName]) loader = CMSSWAPILoader(self.cmssw["ScramArch"], self.cmssw["CMSSWVersion"], self.cmssw["CMSPath"]) try: loader.load() except Exception, ex: logging.error("Couldn't load CMSSW libraries: %s" % ex) return None try: modRef = imp.load_module(cfgBaseName, modPath[0], modPath[1], modPath[2]) except Exception, ex: logging.error("Can't load config: %s" % ex) loader.unload() return None
from ProdCommon.FwkJobRep.MergeReports import updateReport import StageOut.Utilities as StageOutUtils from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec if __name__ == '__main__': msg = "******RuntimeStageOutFailure Invoked*****" state = TaskState(os.getcwd()) state.loadRunResDB() config = state.configurationDict() # // # // find inputs by locating the task for which we are staging out #// and loading its TaskState workflow = WorkflowSpec() workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC']) stageOutFor, override, controls = StageOutUtils.getStageOutConfig( workflow, state.taskName()) inputTasks = stageOutFor for inputTask in inputTasks: inputState = getTaskState(inputTask) if inputState == None: msg = "Input State: %s Not found, skipping..." % inputTask continue inputReport = inputState.getJobReport() inputReport.status = "Failed"
def createLogCollectorWorkflowSpec(wf): """ _createLogColectorWorkflowSpec_ Create a generic LogArchive WorkflowSpec definition """ timestamp = str(time.asctime(time.localtime(time.time()))) timestamp = timestamp.replace(" ", "-") timestamp = timestamp.replace(":", "_") workflow = WorkflowSpec() workflow.setWorkflowName("LogCollect-%s" % timestamp) workflow.setActivity("LogCollect") workflow.setRequestCategory("logcollect") workflow.setRequestTimestamp(timestamp) workflow.parameters['WorkflowType'] = "LogCollect" logArchive = workflow.payload logArchive.name = "logCollect1" logArchive.type = "LogCollect" #TODO: remove this? #logArchive.workflow = wf logArchive.configuration logArchive.application["Project"] = "" logArchive.application["Version"] = "" logArchive.application["Architecture"] = "" logArchive.application[ "Executable"] = "RuntimeLogCollector.py" # binary name logArchive.configuration = "" logArchive.cfgInterface = None #set stageOut override #cfg = IMProvNode("config") #stageOut = IMProvNode("StageOutParameters") #cfg.addNode() #WorkflowTools.addStageOutNode(logArchive, "StageOut1") #WorkflowTools.addStageOutOverride(logArchive, stageOutParams['command'], # stageOutParams['option'], # stageOutParams['se-name'], # stageOutParams['lfnPrefix']) return workflow
def createCleanupWorkflowSpec(): """ _createCleanupWorkflowSpec_ Create a generic cleanup WorkflowSpec definition that can be used to generate a sanbox for cleanup jobs """ timestamp = str(time.asctime(time.localtime(time.time()))) timestamp = timestamp.replace(" ", "-") timestamp = timestamp.replace(":", "_") workflow = WorkflowSpec() workflow.setWorkflowName("CleanUp-%s" % timestamp) workflow.setActivity("CleanUp") workflow.setRequestCategory("mc-cleanup") workflow.setRequestTimestamp(timestamp) workflow.parameters['WorkflowType'] = "CleanUp" cleanUp = workflow.payload cleanUp.name = "cleanUp1" cleanUp.type = "CleanUp" cleanUp.application["Project"] = "" cleanUp.application["Version"] = "" cleanUp.application["Architecture"] = "" cleanUp.application["Executable"] = "RuntimeCleanUp.py" # binary name cleanUp.configuration = "" cleanUp.cfgInterface = None return workflow
def __call__(self, collectPayload): """ _operator(collectPayload)_ Given the dataset and run in the payload, callout to DBS to find the files to be harvested """ msg = "DBSPlugin invoked for %s" % str(collectPayload) logging.info(msg) site = self.args.get("Site", "srm.cern.ch") baseCache = os.path.join(self.args['ComponentDir'], "DBSPlugin") if not os.path.exists(baseCache): os.makedirs(baseCache) datasetCache = os.path.join(baseCache, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) if not os.path.exists(datasetCache): os.makedirs(datasetCache) workflowFile = os.path.join( datasetCache, "%s-%s-%s-DQMHarvest-Workflow.xml" % ( collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier']) ) if not os.path.exists(workflowFile): msg = "No workflow found for dataset: %s\n " % ( collectPayload.datasetPath(),) msg += "Looking up software version and generating workflow..." if self.args.get("OverrideGlobalTag", None) == None: globalTag = findGlobalTagForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) else: globalTag = self.args['OverrideGlobalTag'] if self.args.get("OverrideCMSSW", None) != None: cmsswVersion = self.args['OverrideCMSSW'] msg = "Using Override for CMSSW Version %s" % ( self.args['OverrideCMSSW'],) logging.info(msg) else: cmsswVersion = findVersionForDataset( self.dbsUrl, collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) msg = "Found CMSSW Version for dataset/run\n" msg += " Dataset %s Run %s\n" % (collectPayload.datasetPath(), collectPayload['RunNumber']) msg += " CMSSW Version = %s\n " % cmsswVersion logging.info(msg) workflowSpec = createHarvestingWorkflow( collectPayload.datasetPath(), site, self.args['CmsPath'], self.args['ScramArch'], cmsswVersion, globalTag, configFile=self.args['ConfigFile'], DQMServer=self.args['DQMServer'], proxyLocation=self.args['proxyLocation'], DQMCopyToCERN=self.args['DQMCopyToCERN'], doStageOut=self.args['DoStageOut']) workflowSpec.save(workflowFile) msg = "Created Harvesting Workflow:\n %s" % workflowFile logging.info(msg) self.publishWorkflow(workflowFile, workflowSpec.workflowName()) else: msg = "Loading existing workflow for dataset: %s\n " % ( collectPayload.datasetPath(),) msg += " => %s\n" % workflowFile logging.info(msg) workflowSpec = WorkflowSpec() workflowSpec.load(workflowFile) job = {} jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s-%s" % ( workflowSpec.workflowName(), collectPayload['RunNumber'], time.strftime("%H-%M-%S-%d-%m-%y") ) jobSpec.setJobName(jobName) jobSpec.setJobType("Harvesting") jobSpec.parameters['RunNumber'] = collectPayload['RunNumber'] jobSpec.addWhitelistSite(site) jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) jobSpec.payload.cfgInterface.inputFiles.extend( listFilesInRun( DBSReader(self.dbsUrl), collectPayload['PrimaryDataset'], collectPayload['ProcessedDataset'], collectPayload['DataTier'], collectPayload['RunNumber']) ) specCacheDir = os.path.join( datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4)) if not os.path.exists(specCacheDir): os.makedirs(specCacheDir) jobSpecFile = os.path.join(specCacheDir, "%s-JobSpec.xml" % jobName) jobSpec.save(jobSpecFile) job["JobSpecId"] = jobName job["JobSpecFile"] = jobSpecFile job['JobType'] = "Harvesting" job["WorkflowSpecId"] = workflowSpec.workflowName(), job["WorkflowPriority"] = 10 job["Sites"] = [site] job["Run"] = collectPayload['RunNumber'] job['WorkflowSpecFile'] = workflowFile msg = "Harvesting Job Created for\n" msg += " => Run: %s\n" % collectPayload['RunNumber'] msg += " => Primary: %s\n" % collectPayload['PrimaryDataset'] msg += " => Processed: %s\n" % collectPayload['ProcessedDataset'] msg += " => Tier: %s\n" % collectPayload['DataTier'] msg += " => Workflow: %s\n" % job['WorkflowSpecId'] msg += " => Job: %s\n" % job['JobSpecId'] msg += " => Site: %s\n" % job['Sites'] logging.info(msg) return [job]
def __init__(self, requestId, channel, label): self.requestId = requestId self.group = None self.label = label self.timestamp = int(time.time()) self.channel = channel self.cmsswVersions = [] self.configurations = [] self.psetHashes = {} self.origCfgs = {} self.acquisitionEra = None self.processingString = None self.processingVersion = None self.conditions = None # turn on use of proper naming convention for datasets # should be made the default soon, lets deprecate all the old crap self.useProperNamingConventions = False self.options = {} self.options.setdefault('FakeHash', False) # Should we use another attribute for setting the output dataset # status in DBS? self.outputDatasetStatus = 'VALID' self.inputDataset = {} self.inputDataset['IsUsed'] = False self.inputDataset['DatasetName'] = None self.inputDataset['Primary'] = None self.inputDataset['Processed'] = None self.inputDataset['DataTier'] = None # // # // Extra controls over input dataset if required #// self.inputDataset['SplitType'] = None self.inputDataset['SplitSize'] = None self.inputDataset['OnlySites'] = None self.inputDataset['OnlyBlocks'] = None self.inputDataset['OnlyClosedBlocks'] = True # // # // Pileup Dataset controls #// self.pileupDatasets = [] # // # // Initialise basic workflow #// self.workflow = WorkflowSpec() self.workflowName = "%s-%s-%s" % (label, channel, requestId) self.workflow.setWorkflowName(self.workflowName) self.workflow.setRequestCategory("mc") self.workflow.setRequestTimestamp(self.timestamp) self.workflow.parameters['RequestLabel'] = self.label self.workflow.parameters['ProdRequestID'] = self.requestId self.cmsRunNode = self.workflow.payload self.cmsRunNode.name = "cmsRun1" self.cmsRunNode.type = "CMSSW" self.cmsRunNodes = [self.cmsRunNode] self.saveOutputFor = []
class RequestIterator: """ _RequestIterator_ Working from a Generic Workflow template, generate concrete jobs from it, keeping in-memory history """ def __init__(self, workflowSpecFile, workingDir): self.workflow = workflowSpecFile self.workingDir = workingDir self.count = 0 self.runIncrement = 1 self.currentJob = None self.sitePref = None self.pileupDatasets = {} self.ownedJobSpecs = {} # // # // Initially hard coded, should be extracted from Component Config #// self.eventsPerJob = 10 self.workflowSpec = WorkflowSpec() try: self.workflowSpec.load(workflowSpecFile) except: logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile)) return if self.workflowSpec.parameters.get("RunIncrement", None) != None: self.runIncrement = int( self.workflowSpec.parameters['RunIncrement'] ) self.generators = GeneratorMaker() self.workflowSpec.payload.operate(self.generators) # // # // Cache Area for JobSpecs #// self.specCache = os.path.join( self.workingDir, "%s-Cache" %self.workflowSpec.workflowName()) if not os.path.exists(self.specCache): os.makedirs(self.specCache) def loadPileupDatasets(self): """ _loadPileupDatasets_ Are we dealing with pileup? If so pull in the file list """ puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) self.pileupDatasets = createPileupDatasets(self.workflowSpec) return def loadPileupSites(self): """ _loadPileupSites_ Are we dealing with pileup? If so pull in the site list """ sites = [] puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) sites = getPileupSites(self.workflowSpec) return sites def __call__(self): """ _operator()_ When called generate a new concrete job payload from the generic workflow and return it. """ newJobSpec = self.createJobSpec() self.count += self.runIncrement return newJobSpec def createJobSpec(self): """ _createJobSpec_ Load the WorkflowSpec object and generate a JobSpec from it """ jobSpec = self.workflowSpec.createJobSpec() jobName = "%s-%s" % ( self.workflowSpec.workflowName(), self.count, ) self.currentJob = jobName jobSpec.setJobName(jobName) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = self.count jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) jobSpec.payload.operate(self.generateJobConfig) jobSpec.payload.operate(self.generateCmsGenConfig) specCacheDir = os.path.join( self.specCache, str(self.count // 1000).zfill(4)) if not os.path.exists(specCacheDir): os.makedirs(specCacheDir) jobSpecFile = os.path.join(specCacheDir, "%s-JobSpec.xml" % jobName) self.ownedJobSpecs[jobName] = jobSpecFile # // # // Add site pref if set #// if self.sitePref != None: # #AF: Allow site pref to be a comma separated list of sites, each one # added in the Whitelist: # jobSpec.addWhitelistSite(self.sitePref) for siteWhite in self.sitePref.split(","): jobSpec.addWhitelistSite(siteWhite) jobSpec.save(jobSpecFile) return jobSpecFile def generateJobConfig(self, jobSpecNode): """ _generateJobConfig_ Operator to act on a JobSpecNode tree to convert the template config file into a JobSpecific Config File """ if jobSpecNode.name not in self.generators.keys(): return generator = self.generators[jobSpecNode.name] useOutputMaxEv = False if jobSpecNode.cfgInterface != None: outMaxEv = jobSpecNode.cfgInterface.maxEvents['output'] if outMaxEv != None: useOutputMaxEv = True if useOutputMaxEv: jobCfg = generator(self.currentJob, maxEventsWritten = self.eventsPerJob, firstRun = self.count) else: jobCfg = generator(self.currentJob, maxEvents = self.eventsPerJob, firstRun = self.count) # // # // Is there pileup for this node? #// if self.pileupDatasets.has_key(jobSpecNode.name): puDataset = self.pileupDatasets[jobSpecNode.name] logging.debug("Node: %s has a pileup dataset: %s" % ( jobSpecNode.name, puDataset.dataset, )) fileList = puDataset.getPileupFiles() jobCfg.pileupFiles = fileList jobSpecNode.cfgInterface = jobCfg return def generateCmsGenConfig(self, jobSpecNode): """ _generateCmsGenConfig_ Process CmsGen type nodes to insert maxEvents and run numbers for cmsGen jobs """ if jobSpecNode.type != "CmsGen": return jobSpecNode.applicationControls['firstRun'] = self.count jobSpecNode.applicationControls['maxEvents'] = self.eventsPerJob jobSpecNode.applicationControls['randomSeed'] = randomSeed() jobSpecNode.applicationControls['fileName'] = "%s-%s.root" % ( self.currentJob, jobSpecNode.name) jobSpecNode.applicationControls['logicalFileName'] = "%s-%s.root" % ( self.currentJob, jobSpecNode.name) return def removeSpec(self, jobSpecId): """ _removeSpec_ Remove a Spec file when it has been successfully injected """ if jobSpecId not in self.ownedJobSpecs.keys(): return logging.info("Removing JobSpec For: %s" % jobSpecId) filename = self.ownedJobSpecs[jobSpecId] if os.path.exists(filename): os.remove(filename) del self.ownedJobSpecs[jobSpecId] return def save(self, directory): """ _save_ Persist this objects state into an XML file and save it in the directory provided """ doc = IMProvDoc("RequestIterator") node = IMProvNode(self.workflowSpec.workflowName()) doc.addNode(node) node.addNode(IMProvNode("Run", None, Value = str(self.count))) node.addNode( IMProvNode("EventsPerJob", None, Value = str(self.eventsPerJob)) ) node.addNode(IMProvNode("SitePref", None, Value = str(self.sitePref))) pu = IMProvNode("Pileup") node.addNode(pu) for key, value in self.pileupDatasets.items(): puNode = value.save() puNode.attrs['PayloadNode'] = key pu.addNode(puNode) specs = IMProvNode("JobSpecs") node.addNode(specs) for key, val in self.ownedJobSpecs.items(): specs.addNode(IMProvNode("JobSpec", val, ID = key)) fname = os.path.join( directory, "%s-Persist.xml" % self.workflowSpec.workflowName() ) handle = open(fname, 'w') handle.write(doc.makeDOMDocument().toprettyxml()) handle.close() return def load(self, directory): """ _load_ Load this instance given the workflow and directory containing the persistency file """ fname = os.path.join( directory, "%s-Persist.xml" % self.workflowSpec.workflowName() ) try: node = loadIMProvFile(fname) except Exception, ex: msg = "ERROR: Corrupted Persistency File:\n" msg += " => %s\n" % fname msg += "Cannot be read:\n => %s\n" % str(ex) logging.error(msg) return qbase = "/RequestIterator/%s" % self.workflowSpec.workflowName() runQ = IMProvQuery("%s/Run[attribute(\"Value\")]" % qbase) eventQ = IMProvQuery("%s/EventsPerJob[attribute(\"Value\")]" % qbase) siteQ = IMProvQuery("%s/SitePref[attribute(\"Value\")]" % qbase) runVal = int(runQ(node)[-1]) eventVal = int(eventQ(node)[-1]) siteVal = str(siteQ(node)[-1]) if siteVal.lower() == "none": siteVal = None self.count = runVal self.eventsPerJob = eventVal self.sitePref = siteVal puQ = IMProvQuery("%s/Pileup/*" % qbase) puNodes = puQ(node) for puNode in puNodes: payloadNode = str(puNode.attrs.get("PayloadNode")) puDataset = PileupDataset("dummy", 1) puDataset.load(puNode) self.pileupDatasets[payloadNode] = puDataset specQ = IMProvQuery("%s/JobSpecs/*" % qbase) specNodes = specQ(node) for specNode in specNodes: specId = str(specNode.attrs['ID']) specFile = str(specNode.chardata).strip() self.ownedJobSpecs[specId] = specFile return
def createMergeJobWorkflow(procSpec, isFastMerge=True, doCleanUp=True, littleE=False): """ _createMergeJobWorkflow_ Given a Processing Workflow, generate a set of Merge Job workflows that can be used to generate actual merge jobs (as opposed to creating datasets like createMergeDatasetWorkflow) returns a dictionary of (input, IE MergeSensor watched) dataset name to workflow spec instances """ mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge) mergeDatasets = mergeDatasetWF.outputDatasets() results = {} procSpecName = procSpec.workflowName() for dataset in mergeDatasets: inputDataset = dataset['ParentDataset'] newWF = WorkflowSpec() newWF.parameters.update(procSpec.parameters) newWF.setWorkflowName(procSpecName) newWF.parameters['WorkflowType'] = "Merge" cmsRunNode = newWF.payload cmsRunNode.name = "cmsRun1" cmsRunNode.type = "CMSSW" cmsRunNode.application["Project"] = "CMSSW" cmsRunNode.application["Version"] = dataset['ApplicationVersion'] cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323" # // # // Hack to forward UserSandbox to Merge Jobs #// userSandbox = dataset.get("UserSandbox", None) if userSandbox != None: cmsRunNode.userSandbox = userSandbox #if isFastMerge == True: # if littleE: # cmsRunNode.application["Executable"] = "edmFastMerge" # else: # cmsRunNode.application["Executable"] = _FastMergeBinary # outputModuleName = "EdmFastMerge" #else: cmsRunNode.application["Executable"] = "cmsRun" outputModuleName = "Merged" # // # // Input Dataset #// datasetBits = DatasetConventions.parseDatasetPath(inputDataset) inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'], datasetBits['Processed']) inDataset["DataTier"] = datasetBits['DataTier'] # // # // Output Dataset #// outputDataset = cmsRunNode.addOutputDataset( dataset['PrimaryDataset'], dataset['ProcessedDataset'], outputModuleName) outputDataset["DataTier"] = dataset['DataTier'] outputDataset["PSetHash"] = dataset['PSetHash'] outputDataset["ApplicationName"] = \ cmsRunNode.application["Executable"] outputDataset["ApplicationProject"] = \ cmsRunNode.application["Project"] outputDataset["ApplicationVersion"] = \ cmsRunNode.application["Version"] outputDataset["ApplicationFamily"] = outputModuleName outputDataset["PhysicsGroup"] = \ procSpec.parameters.get('PhysicsGroup', None) outputDataset['ParentDataset'] = inputDataset # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") if doCleanUp == True: WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1") # // # // Add log archive node #// WorkflowTools.addLogArchNode(cmsRunNode, "logArchive") WorkflowTools.generateFilenames(newWF) results[inputDataset] = newWF return results
def __init__(self, workflowSpecFile, workingDir): self.workflow = workflowSpecFile self.workingDir = workingDir self.currentJob = None self.workflowSpec = WorkflowSpec() self.workflowSpec.load(workflowSpecFile) self.currentJobDef = None self.count = 0 self.onlyClosedBlocks = False if self.workflowSpec.parameters.has_key("OnlyClosedBlocks"): onlyClosed = str( self.workflowSpec.parameters["OnlyClosedBlocks"]).lower() if onlyClosed == "true": self.onlyClosedBlocks = True self.ownedJobSpecs = {} self.allowedBlocks = [] self.allowedSites = [] self.dbsUrl = getLocalDBSURL() self.splitType = \ self.workflowSpec.parameters.get("SplitType", "file").lower() self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1)) self.generators = GeneratorMaker() self.generators(self.workflowSpec.payload) self.pileupDatasets = {} # // # // Does the workflow contain a block restriction?? #// blockRestriction = \ self.workflowSpec.parameters.get("OnlyBlocks", None) if blockRestriction != None: # // # // restriction on blocks present, populate allowedBlocks list #// msg = "Block restriction provided in Workflow Spec:\n" msg += "%s\n" % blockRestriction blockList = blockRestriction.split(",") for block in blockList: if len(block.strip() ) > 0: self.allowedBlocks.append(block.strip()) # // # // Does the workflow contain a site restriction?? #// siteRestriction = \ self.workflowSpec.parameters.get("OnlySites", None) if siteRestriction != None: # // # // restriction on sites present, populate allowedSites list #// msg = "Site restriction provided in Workflow Spec:\n" msg += "%s\n" % siteRestriction siteList = siteRestriction.split(",") for site in siteList: if len(site.strip() ) > 0: self.allowedSites.append(site.strip()) # // # // Is the DBSURL contact information provided?? #// value = self.workflowSpec.parameters.get("DBSURL", None) if value != None: self.dbsUrl = value if self.dbsUrl == None: msg = "Error: No DBSURL available for dataset:\n" msg += "Cant get local DBSURL and one not provided with workflow" raise RuntimeError, msg # // # // Cache Area for JobSpecs #// self.specCache = os.path.join( self.workingDir, "%s-Cache" %self.workflowSpec.workflowName()) if not os.path.exists(self.specCache): os.makedirs(self.specCache)
class DatasetIterator: """ _DatasetIterator_ Working from a Generic Workflow template, generate concrete jobs from it, keeping in-memory history """ def __init__(self, workflowSpecFile, workingDir): self.workflow = workflowSpecFile self.workingDir = workingDir self.currentJob = None self.workflowSpec = WorkflowSpec() self.workflowSpec.load(workflowSpecFile) self.currentJobDef = None self.count = 0 self.onlyClosedBlocks = False if self.workflowSpec.parameters.has_key("OnlyClosedBlocks"): onlyClosed = str( self.workflowSpec.parameters["OnlyClosedBlocks"]).lower() if onlyClosed == "true": self.onlyClosedBlocks = True self.ownedJobSpecs = {} self.allowedBlocks = [] self.allowedSites = [] self.dbsUrl = getLocalDBSURL() self.splitType = \ self.workflowSpec.parameters.get("SplitType", "file").lower() self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1)) self.generators = GeneratorMaker() self.generators(self.workflowSpec.payload) self.pileupDatasets = {} # // # // Does the workflow contain a block restriction?? #// blockRestriction = \ self.workflowSpec.parameters.get("OnlyBlocks", None) if blockRestriction != None: # // # // restriction on blocks present, populate allowedBlocks list #// msg = "Block restriction provided in Workflow Spec:\n" msg += "%s\n" % blockRestriction blockList = blockRestriction.split(",") for block in blockList: if len(block.strip() ) > 0: self.allowedBlocks.append(block.strip()) # // # // Does the workflow contain a site restriction?? #// siteRestriction = \ self.workflowSpec.parameters.get("OnlySites", None) if siteRestriction != None: # // # // restriction on sites present, populate allowedSites list #// msg = "Site restriction provided in Workflow Spec:\n" msg += "%s\n" % siteRestriction siteList = siteRestriction.split(",") for site in siteList: if len(site.strip() ) > 0: self.allowedSites.append(site.strip()) # // # // Is the DBSURL contact information provided?? #// value = self.workflowSpec.parameters.get("DBSURL", None) if value != None: self.dbsUrl = value if self.dbsUrl == None: msg = "Error: No DBSURL available for dataset:\n" msg += "Cant get local DBSURL and one not provided with workflow" raise RuntimeError, msg # // # // Cache Area for JobSpecs #// self.specCache = os.path.join( self.workingDir, "%s-Cache" %self.workflowSpec.workflowName()) if not os.path.exists(self.specCache): os.makedirs(self.specCache) def __call__(self, jobDef): """ _operator()_ When called generate a new concrete job payload from the generic workflow and return it. The JobDef should be a JobDefinition with the input details including LFNs and event ranges etc. """ newJobSpec = self.createJobSpec(jobDef) self.count += 1 return newJobSpec def loadPileupDatasets(self): """ _loadPileupDatasets_ Are we dealing with pileup? If so pull in the file list """ puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) self.pileupDatasets = createPileupDatasets(self.workflowSpec) return def loadPileupSites(self): """ _loadPileupSites_ Are we dealing with pileup? If so pull in the site list """ sites = [] puDatasets = self.workflowSpec.pileupDatasets() if len(puDatasets) > 0: logging.info("Found %s Pileup Datasets for Workflow: %s" % ( len(puDatasets), self.workflowSpec.workflowName(), )) sites = getPileupSites(self.workflowSpec) return sites def inputDataset(self): """ _inputDataset_ Extract the input Dataset from this workflow """ topNode = self.workflowSpec.payload try: inputDataset = topNode._InputDatasets[-1] except StandardError, ex: msg = "Error extracting input dataset from Workflow:\n" msg += str(ex) logging.error(msg) return None return inputDataset.name()
""" import sys, os from ProdAgentDB.Config import defaultConfig as dbConfig from ProdCommon.Database import Session from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec from ProdAgentCore.Configuration import loadProdAgentConfiguration from MergeSensor.MergeSensorDB import MergeSensorDB from JobQueue.JobQueueDB import JobQueueDB import ProdAgent.WorkflowEntities.Aux as WEAux import ProdAgent.WorkflowEntities.Workflow as WEWorkflow workflow = sys.argv[1] workflowSpec = WorkflowSpec() workflowSpec.load(workflow) # // # // Clean out job cache #// config = loadProdAgentConfiguration() compCfg = config.getConfig("JobCreator") creatorCache = os.path.expandvars(compCfg['ComponentDir']) workflowCache = os.path.join(creatorCache, workflowSpec.workflowName()) if os.path.exists(workflowCache): os.system("/bin/rm -rf %s" % workflowCache)
logFormatter = logging.Formatter("%(asctime)s:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) logging.getLogger().setLevel(logging.INFO) Dataset.setLogging(logging) database = MergeSensorDB() Dataset.setDatabase(database) workflowFile = sys.argv[1] print "Updating DB for workflow: ", workflowFile # read the WorkflowSpecFile try: wfile = WorkflowSpec() wfile.load(workflowFile) # wrong dataset file except Exception, msg: print "Error loading workflow specifications from %s: %s" \ % (workflowFile, msg) sys.exit(1) # get output modules try: outputDatasetsList = wfile.outputDatasets() outputModules = [outDS['OutputModuleName'] \ for outDS in outputDatasetsList]
def __init__(self): self.state = TaskState(os.getcwd()) self.state.loadRunResDB() self.state.loadJobSpecNode() # // # // check for store fail settings #// self.workflow = WorkflowSpec() self.workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC']) self.doStoreFail = self.workflow.parameters.get("UseStoreFail", False) if str(self.doStoreFail).lower() == "true": self.doStoreFail = True self.config = self.state.configurationDict() self.inputTasks = self.config.get("InputTasks", []) # #TODO: not really sure this is correct, i would think i should # # take report from stageOut but that is missing if cmsRun fails # # what if cmsRun one is missing - do i want to generate an empty one? # self.inputReport = getTaskState(self.inputTasks[0]).getJobReport() # iterate over input tasks (in reverse order) # find first one with a fjr self.inputTask, self.inputReport = None, None for taskName in self.inputTasks[::-1]: task = getTaskState(taskName) report = task.getJobReport() if report is None: continue self.inputTask = task self.inputReport = report break # if got no valid fjr from previous tasks - # something must have gone wrong earlier - make our own # may need more things set here to make reports mergeable if self.inputReport is None: self.inputTask = self.state self.inputReport = FwkJobReport() self.regexps = self.config.get("LogMatchRegexp", []) self.doStageOut = True doingStageOut = self.config.get("DoStageOut", []) if len(doingStageOut) > 0: control = doingStageOut[-1] if control == "False": self.doStageOut = False self.workflowSpecId = self.config['WorkflowSpecID'][0] self.jobSpecId = self.state.jobSpecNode.jobName self.compRegexps = [] for regexp in self.regexps: self.compRegexps.append(re.compile(regexp)) # TODO: These should be pulled in from the workflow now not the config thing self.override = False soParams = self.config.get('StageOutParameters', {}) self.override = soParams.has_key("Override") self.overrideParams = {} if self.override: overrideConf = self.config['StageOutParameters']['Override'] self.overrideParams = { "command" : None, "option" : None, "se-name" : None, "lfn-prefix" : None, } try: self.overrideParams['command'] = overrideConf['command'][0] self.overrideParams['se-name'] = overrideConf['se-name'][0] self.overrideParams['lfn-prefix'] = overrideConf['lfn-prefix'][0] except StandardError, ex: msg = "Unable to extract Override parameters from config:\n" msg += str(self.config['StageOutParameters']) raise StageOutInitError(msg) if overrideConf.has_key('option'): if len(overrideConf['option']) > 0: self.overrideParams['option'] = overrideConf['option'][-1] else: self.overrideParams['option'] = ""
#!/usr/bin/env python import os import pickle from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWAPILoader import CMSSWAPILoader from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec from ProdCommon.MCPayloads.DatasetTools import getOutputDatasetsWithPSet specfile = "/uscms/home/gutsche/CSA08-JetET110-CSA08_S43_S43_rereco_may19_PIC_v1-Workflow.xml" rawCfgFile = "%s.raw.cfg" % os.path.basename(specfile) origCfgFile = "%s.orig.cfg" % os.path.basename(specfile) dbsCfgFile = "%s.dbs.cfg" % os.path.basename(specfile) spec = WorkflowSpec() spec.load(specfile) rawCfg = spec.payload.cfgInterface.rawCfg originalCfg = spec.payload.cfgInterface.originalCfg dbsDatasets = getOutputDatasetsWithPSet(spec.payload) handle = open(dbsCfgFile, 'w') handle.write(dbsDatasets[0]['PSetContent']) handle.close() handle = open(origCfgFile, 'w') handle.write(originalCfg) handle.close() loader = CMSSWAPILoader(os.environ['SCRAM_ARCH'],