def createCleanupWorkflowSpec(): """ _createCleanupWorkflowSpec_ Create a generic cleanup WorkflowSpec definition that can be used to generate a sanbox for cleanup jobs """ timestamp = str(time.asctime(time.localtime(time.time()))) timestamp = timestamp.replace(" ", "-") timestamp = timestamp.replace(":", "_") workflow = WorkflowSpec() workflow.setWorkflowName("CleanUp-%s" % timestamp) workflow.setActivity("CleanUp") workflow.setRequestCategory("mc-cleanup") workflow.setRequestTimestamp(timestamp) workflow.parameters['WorkflowType'] = "CleanUp" cleanUp = workflow.payload cleanUp.name = "cleanUp1" cleanUp.type = "CleanUp" cleanUp.application["Project"] = "" cleanUp.application["Version"] = "" cleanUp.application["Architecture"] = "" cleanUp.application["Executable"] = "RuntimeCleanUp.py" # binary name cleanUp.configuration = "" cleanUp.cfgInterface = None return workflow
def load(self, improvNode): """ _load_ Extract information for this object from the improv instance provided """ wfQuery = IMProvQuery("/RequestSpec/WorkflowSpec") wfnode = wfQuery(improvNode)[0] wfspec = WorkflowSpec() wfspec.loadFromNode(wfnode) self.workflow = wfspec policyQuery = IMProvQuery("/RequestSpec/Policies/*") detailQuery = IMProvQuery("/RequestSpec/RequestDetails/*") preferredPAQuery = IMProvQuery("/RequestSpec/PreferredPA") policies = policyQuery(improvNode) details = detailQuery(improvNode) preferredPAs = preferredPAQuery(improvNode) for policy in policies: self.policies[str(policy.name)] = str(policy.chardata) for detail in improvNode.attrs.keys(): self.requestDetails[detail] = str(improvNode.attrs[detail]) for preferredPA in preferredPAs: self.preferredPAs[str(preferredPA.attrs['id'])] = \ str(preferredPA.attrs['priority']) return
def createProductionWorkflow(prodName, cmsswVersion, cfgFile=None, category="mc", **args): """ _createProductionWorkflow_ Create a Production style workflow, ie generation of new events """ timestamp = int(time.time()) if args.get("PyCfg", None) == None: if cfgFile == None: msg = "Error: No Cfg File or python cfg file provided to createProductionWorkflow" raise RuntimeError, msg pycfgFile = createPythonConfig(cfgFile) pycfgFileContent = file(pycfgFile).read() else: pycfgFileContent = args['PyCfg'] if args.get("PSetHash", None) == None: realPSetHash = createPSetHash(cfgFile) else: realPSetHash = args['PSetHash'] # // # // Create a new WorkflowSpec and set its name #// spec = WorkflowSpec() workflowname = "%s__%s-%s-%s-%s" % ( prodName, cmsswVersion, args.get("processingLabel", "Test07"), args.get("physicsGroup", "NoPhysicsGroup"), timestamp) spec.setWorkflowName(workflowname) spec.setRequestCategory(category) spec.setRequestTimestamp(timestamp) cmsRun = spec.payload populateCMSRunNode(cmsRun, "cmsRun1", cmsswVersion, pycfgFileContent, realPSetHash, timestamp, prodName, physicsGroup=args.get("physicsGroup", "NoPhysicsGroup"), processingLabel=args.get("processingLabel", "Test07"), fakeHash=args.get("FakeHash", False)) addStageOutNode(cmsRun, "stageOut1") generateFilenames(spec) return spec
def createJobSpec(jobSpecId, workflowSpecFile, filename, runNumber, eventCount, firstEvent=None, saveString=False, loadString=True): # // # // Load workflow #// workflowSpec = WorkflowSpec() if loadString: workflowSpec.loadString(workflowSpecFile) else: workflowSpec.load(workflowSpecFile) # // # // Create JobSpec #// jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % (workflowSpec.workflowName(), runNumber) #jobSpec.setJobName(jobName) jobSpec.setJobName(jobSpecId) jobSpec.setJobType("Processing") jobSpec.parameters['RunNumber'] = runNumber jobSpec.parameters['EventCount'] = eventCount jobSpec.payload.operate(DefaultLFNMaker(jobSpec)) if firstEvent != None: jobSpec.parameters['FirstEvent'] = firstEvent cfgMaker = ConfigGenerator(jobSpec) jobSpec.payload.operate(cfgMaker) if saveString: return jobSpec.saveString() jobSpec.save(filename) return
def createLogCollectorWorkflowSpec(wf): """ _createLogColectorWorkflowSpec_ Create a generic LogArchive WorkflowSpec definition """ timestamp = str(time.asctime(time.localtime(time.time()))) timestamp = timestamp.replace(" ", "-") timestamp = timestamp.replace(":", "_") workflow = WorkflowSpec() workflow.setWorkflowName("LogCollect-%s" % timestamp) workflow.setActivity("LogCollect") workflow.setRequestCategory("logcollect") workflow.setRequestTimestamp(timestamp) workflow.parameters['WorkflowType'] = "LogCollect" logArchive = workflow.payload logArchive.name = "logCollect1" logArchive.type = "LogCollect" #TODO: remove this? #logArchive.workflow = wf logArchive.configuration logArchive.application["Project"] = "" logArchive.application["Version"] = "" logArchive.application["Architecture"] = "" logArchive.application[ "Executable"] = "RuntimeLogCollector.py" # binary name logArchive.configuration = "" logArchive.cfgInterface = None #set stageOut override #cfg = IMProvNode("config") #stageOut = IMProvNode("StageOutParameters") #cfg.addNode() #WorkflowTools.addStageOutNode(logArchive, "StageOut1") #WorkflowTools.addStageOutOverride(logArchive, stageOutParams['command'], # stageOutParams['option'], # stageOutParams['se-name'], # stageOutParams['lfnPrefix']) return workflow
def __init__(self, requestId, channel, label): self.requestId = requestId self.group = None self.label = label self.timestamp = int(time.time()) self.channel = channel self.cmsswVersions = [] self.configurations = [] self.psetHashes = {} self.origCfgs = {} self.acquisitionEra = None self.processingString = None self.processingVersion = None self.conditions = None # turn on use of proper naming convention for datasets # should be made the default soon, lets deprecate all the old crap self.useProperNamingConventions = False self.options = {} self.options.setdefault('FakeHash', False) # Should we use another attribute for setting the output dataset # status in DBS? self.outputDatasetStatus = 'VALID' self.inputDataset = {} self.inputDataset['IsUsed'] = False self.inputDataset['DatasetName'] = None self.inputDataset['Primary'] = None self.inputDataset['Processed'] = None self.inputDataset['DataTier'] = None # // # // Extra controls over input dataset if required #// self.inputDataset['SplitType'] = None self.inputDataset['SplitSize'] = None self.inputDataset['OnlySites'] = None self.inputDataset['OnlyBlocks'] = None self.inputDataset['OnlyClosedBlocks'] = True # // # // Pileup Dataset controls #// self.pileupDatasets = [] # // # // Initialise basic workflow #// self.workflow = WorkflowSpec() self.workflowName = "%s-%s-%s" % (label, channel, requestId) self.workflow.setWorkflowName(self.workflowName) self.workflow.setRequestCategory("mc") self.workflow.setRequestTimestamp(self.timestamp) self.workflow.parameters['RequestLabel'] = self.label self.workflow.parameters['ProdRequestID'] = self.requestId self.cmsRunNode = self.workflow.payload self.cmsRunNode.name = "cmsRun1" self.cmsRunNode.type = "CMSSW" self.cmsRunNodes = [self.cmsRunNode] self.saveOutputFor = []
#!/usr/bin/env python import os import pickle from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWAPILoader import CMSSWAPILoader from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec from ProdCommon.MCPayloads.DatasetTools import getOutputDatasetsWithPSet specfile = "/uscms/home/gutsche/CSA08-JetET110-CSA08_S43_S43_rereco_may19_PIC_v1-Workflow.xml" rawCfgFile = "%s.raw.cfg" % os.path.basename(specfile) origCfgFile = "%s.orig.cfg" % os.path.basename(specfile) dbsCfgFile = "%s.dbs.cfg" % os.path.basename(specfile) spec = WorkflowSpec() spec.load(specfile) rawCfg = spec.payload.cfgInterface.rawCfg originalCfg = spec.payload.cfgInterface.originalCfg dbsDatasets = getOutputDatasetsWithPSet(spec.payload) handle = open(dbsCfgFile, 'w') handle.write(dbsDatasets[0]['PSetContent']) handle.close() handle = open(origCfgFile, 'w') handle.write(originalCfg) handle.close() loader = CMSSWAPILoader(os.environ['SCRAM_ARCH'],
def createMergeJobWorkflow(procSpec, isFastMerge=True, doCleanUp=True, littleE=False): """ _createMergeJobWorkflow_ Given a Processing Workflow, generate a set of Merge Job workflows that can be used to generate actual merge jobs (as opposed to creating datasets like createMergeDatasetWorkflow) returns a dictionary of (input, IE MergeSensor watched) dataset name to workflow spec instances """ mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge) mergeDatasets = mergeDatasetWF.outputDatasets() results = {} procSpecName = procSpec.workflowName() for dataset in mergeDatasets: inputDataset = dataset['ParentDataset'] newWF = WorkflowSpec() newWF.parameters.update(procSpec.parameters) newWF.setWorkflowName(procSpecName) newWF.parameters['WorkflowType'] = "Merge" cmsRunNode = newWF.payload cmsRunNode.name = "cmsRun1" cmsRunNode.type = "CMSSW" cmsRunNode.application["Project"] = "CMSSW" cmsRunNode.application["Version"] = dataset['ApplicationVersion'] cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323" # // # // Hack to forward UserSandbox to Merge Jobs #// userSandbox = dataset.get("UserSandbox", None) if userSandbox != None: cmsRunNode.userSandbox = userSandbox #if isFastMerge == True: # if littleE: # cmsRunNode.application["Executable"] = "edmFastMerge" # else: # cmsRunNode.application["Executable"] = _FastMergeBinary # outputModuleName = "EdmFastMerge" #else: cmsRunNode.application["Executable"] = "cmsRun" outputModuleName = "Merged" # // # // Input Dataset #// datasetBits = DatasetConventions.parseDatasetPath(inputDataset) inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'], datasetBits['Processed']) inDataset["DataTier"] = datasetBits['DataTier'] # // # // Output Dataset #// outputDataset = cmsRunNode.addOutputDataset( dataset['PrimaryDataset'], dataset['ProcessedDataset'], outputModuleName) outputDataset["DataTier"] = dataset['DataTier'] outputDataset["PSetHash"] = dataset['PSetHash'] outputDataset["ApplicationName"] = \ cmsRunNode.application["Executable"] outputDataset["ApplicationProject"] = \ cmsRunNode.application["Project"] outputDataset["ApplicationVersion"] = \ cmsRunNode.application["Version"] outputDataset["ApplicationFamily"] = outputModuleName outputDataset["PhysicsGroup"] = \ procSpec.parameters.get('PhysicsGroup', None) outputDataset['ParentDataset'] = inputDataset # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") if doCleanUp == True: WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1") # // # // Add log archive node #// WorkflowTools.addLogArchNode(cmsRunNode, "logArchive") WorkflowTools.generateFilenames(newWF) results[inputDataset] = newWF return results