def makeWorkflow(self): """ _makeWorkflow_ Generate a workflow. If the self.configFile parameter has been set this will attempt to load the config from file, otherwise it will create an empty process object which will get filled in by the runtime script. """ self.timestamp = int(time.time()) self.workflow = WorkflowSpec() self.workflowName = "AlcaSkim-Run%s-%s" % \ (self.run, self.primaryDataset) self.workflow.setWorkflowName(self.workflowName) self.workflow.setRequestCategory("data") self.workflow.setRequestTimestamp(self.timestamp) self.workflow.parameters["WorkflowType"] = "Processing" self.workflow.parameters["ProdRequestID"] = self.run self.workflow.parameters["RunNumber"] = self.run self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"] self.cmsRunNode = self.workflow.payload self.cmsRunNode.name = "cmsRun1" self.cmsRunNode.type = "CMSSW" self.cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"] self.cmsRunNode.application["Executable"] = "cmsRun" self.cmsRunNode.application["Project"] = "CMSSW" self.cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"] inputDataset = self.cmsRunNode.addInputDataset(self.primaryDataset, self.parentProcessedDataset) inputDataset["DataTier"] = "RECO" if self.configFile == None: self.loadProcessFromFramework() else: self.loadProcessFromFile() self.setupOutputModules() WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") WorkflowTools.generateFilenames(self.workflow) return self.workflow
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[ keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel=self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join( (self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra=self.acquisitionEra, ProcessingString=processingString, ProcessingVersion=self.processingVersion, Unmerged=True) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version=cmsRunNode.application['Version'], Label=self.label, Group=self.group, FilterName=filterName, RequestId=self.requestId, Unmerged=True) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra=self.acquisitionEra, Conditions=self.workflow.parameters['Conditions'], ProcessingVersion=self.workflow. parameters['ProcessingVersion'], FilterName=filterName, Unmerged=True) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset[ 'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets[ '%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow
def createMergeJobWorkflow(procSpec, isFastMerge = True, doCleanUp = True, littleE = False): """ _createMergeJobWorkflow_ Given a Processing Workflow, generate a set of Merge Job workflows that can be used to generate actual merge jobs (as opposed to creating datasets like createMergeDatasetWorkflow) returns a dictionary of (input, IE MergeSensor watched) dataset name to workflow spec instances """ mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge) mergeDatasets = mergeDatasetWF.outputDatasets() results = {} procSpecName = procSpec.workflowName() for dataset in mergeDatasets: inputDataset = dataset['ParentDataset'] newWF = WorkflowSpec() newWF.parameters.update(procSpec.parameters) newWF.setWorkflowName(procSpecName) newWF.parameters['WorkflowType'] = "Merge" cmsRunNode = newWF.payload cmsRunNode.name = "cmsRun1" cmsRunNode.type = "CMSSW" cmsRunNode.application["Project"] = "CMSSW" cmsRunNode.application["Version"] = dataset['ApplicationVersion'] cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323" # // # // Hack to forward UserSandbox to Merge Jobs #// userSandbox = dataset.get("UserSandbox", None) if userSandbox != None: cmsRunNode.userSandbox = userSandbox #if isFastMerge == True: # if littleE: # cmsRunNode.application["Executable"] = "edmFastMerge" # else: # cmsRunNode.application["Executable"] = _FastMergeBinary # outputModuleName = "EdmFastMerge" #else: cmsRunNode.application["Executable"] = "cmsRun" outputModuleName = "Merged" # // # // Input Dataset #// datasetBits = DatasetConventions.parseDatasetPath(inputDataset) inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'], datasetBits['Processed']) inDataset["DataTier"] = datasetBits['DataTier'] # // # // Output Dataset #// outputDataset = cmsRunNode.addOutputDataset( dataset['PrimaryDataset'], dataset['ProcessedDataset'], outputModuleName) outputDataset["DataTier"] = dataset['DataTier'] outputDataset["PSetHash"] = dataset['PSetHash'] outputDataset["ApplicationName"] = \ cmsRunNode.application["Executable"] outputDataset["ApplicationProject"] = \ cmsRunNode.application["Project"] outputDataset["ApplicationVersion"] = \ cmsRunNode.application["Version"] outputDataset["ApplicationFamily"] = outputModuleName outputDataset["PhysicsGroup"] = \ procSpec.parameters.get('PhysicsGroup', None) outputDataset['ParentDataset'] = inputDataset # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") if doCleanUp == True: WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1") # // # // Add log archive node #// WorkflowTools.addLogArchNode(cmsRunNode, "logArchive") WorkflowTools.generateFilenames(newWF) results[inputDataset] = newWF return results
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed'] ) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel = self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join((self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra = self.acquisitionEra, ProcessingString = processingString, ProcessingVersion = self.processingVersion, Unmerged = True ) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version = cmsRunNode.application['Version'], Label = self.label, Group = self.group, FilterName = filterName, RequestId = self.requestId, Unmerged = True ) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra = self.acquisitionEra, Conditions = self.workflow.parameters['Conditions'], ProcessingVersion = self.workflow.parameters['ProcessingVersion'], FilterName = filterName, Unmerged = True ) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset['IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow
def makeWorkflowSpec(self, name, configFile, enableLazyDownload): """ _makeWorkflowSpec_ Create a workflow spec instance """ # // # // Initialise basic workflow #// self.workflow = WorkflowSpec() self.workflow.setWorkflowName(name) self.workflow.setRequestCategory("data") self.workflow.setRequestTimestamp(int(time.time())) self.workflow.parameters["WorkflowType"] = "Repack" self.workflow.parameters["RequestLabel"] = name self.workflow.parameters["ProdRequestID"] = self.run self.workflow.parameters["RunNumber"] = self.run self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"] # runtime support for StreamerJobEntity self.workflow.addPythonLibrary("T0.DataStructs") cmsRunNode = self.workflow.payload cmsRunNode.name = "cmsRun1" cmsRunNode.type = "CMSSW" cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"] cmsRunNode.application["Executable"] = "cmsRun" cmsRunNode.application["Project"] = "CMSSW" cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"] # runtime express script cmsRunNode.scriptControls["PreExe"].append( "T0.ExpressInjector.RuntimeExpress") # build the configuration template for the workflow cmsRunNode.cfgInterface = self.buildConfiguration(configFile, enableLazyDownload) if cmsRunNode.cfgInterface == None: return None # override global tag cmsRunNode.cfgInterface.conditionsTag = self.globalTag # generate Dataset information for workflow from cfgInterface for outMod,moduleInstance in cmsRunNode.cfgInterface.outputModules.items(): primaryName = moduleInstance["primaryDataset"] processedName = moduleInstance["processedDataset"] outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outMod) outDS["DataTier"] = moduleInstance["dataTier"] outDS["ApplicationName"] = cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outMod outDS["PhysicsGroup"] = "Tier0" outDS["ApplicationFamily"] = outMod # generate just single LFN stub (all output is unmerged) # insert them into the output module and dataset info outDS["LFNBase"] = self.getLFN(moduleInstance, dataType = 'express', Unmerged = True) moduleInstance["LFNBase"] = outDS["LFNBase"] moduleInstance["logicalFileName"] = os.path.join( outDS["LFNBase"], "%s.root" % outMod ) WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(cmsRunNode, "logArchive") return self.workflow
try: loader.load() except Exception, ex: msg = "Couldn't load CMSSW libraries: %s" % ex raise RuntimeError, msg loadedModule = imp.load_source( os.path.basename(cfgFile).replace(".py", ""), cfgFile ) cmsRunNode.cfgInterface = CMSSWConfig() loadedConfig = cmsRunNode.cfgInterface.loadConfiguration(loadedModule.process) loadedConfig.validateForProduction() loader.unload() # generate Dataset information for workflow from cfgInterface for moduleName,outMod in cmsRunNode.cfgInterface.outputModules.items(): outMod["LFNBase"] = lfnbase outMod["logicalFileName"] = os.path.join( lfnbase, "%s.root" % moduleName ) WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") workflow.save("%s-Workflow.xml" % workflowName) print "Created: %s-Workflow.xml" % workflowName print "From: %s " % cfgFile
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Input Dataset required for Tier0 #// inputDataset = self.cmsRunNode.addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Extract dataset info from cfg #// for outModName in self.configuration.outputModules.keys(): moduleInstance = self.configuration.getOutputModule(outModName) # // # // Data Tier same as input #// dataTier = self.inputDataset['DataTier'] # // # // Output primary dataset same as input primary #// primaryName = self.inputDataset['Primary'] # // # // Output processed dataset #// (Note we pass way more info than is used, since # //conventions have a tendency to change in CMS... # // #// processedName = DatasetConventions.tier0ProcessedDatasetName( Version=self.cmsswVersion, InputPrimaryDataset=self.inputDataset['Primary'], InputProcessedDataset=self.inputDataset['Processed'], Label=self.label, Group=self.group, RequestId=self.requestId, Unmerged=self.unmergedDataset) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = self.cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ self.cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group outDS["ApplicationFamily"] = outModName if self.inputDataset['IsUsed']: outDS['ParentDataset'] = self.inputDataset['DatasetName'] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % (self.psetHash, guid) else: outDS['PSetHash'] = self.psetHash # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // generate tier0 LFN bases for this workflow #// tier0LFN = self.makeTier0LFN() self.workflow.parameters['MergedLFNBase'] = tier0LFN self.workflow.parameters['UnmergedLFNBase'] = tier0LFN return self.workflow
def createMergeJobWorkflow(procSpec, isFastMerge=True, doCleanUp=True, littleE=False): """ _createMergeJobWorkflow_ Given a Processing Workflow, generate a set of Merge Job workflows that can be used to generate actual merge jobs (as opposed to creating datasets like createMergeDatasetWorkflow) returns a dictionary of (input, IE MergeSensor watched) dataset name to workflow spec instances """ mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge) mergeDatasets = mergeDatasetWF.outputDatasets() results = {} procSpecName = procSpec.workflowName() for dataset in mergeDatasets: inputDataset = dataset['ParentDataset'] newWF = WorkflowSpec() newWF.parameters.update(procSpec.parameters) newWF.setWorkflowName(procSpecName) newWF.parameters['WorkflowType'] = "Merge" cmsRunNode = newWF.payload cmsRunNode.name = "cmsRun1" cmsRunNode.type = "CMSSW" cmsRunNode.application["Project"] = "CMSSW" cmsRunNode.application["Version"] = dataset['ApplicationVersion'] cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323" # // # // Hack to forward UserSandbox to Merge Jobs #// userSandbox = dataset.get("UserSandbox", None) if userSandbox != None: cmsRunNode.userSandbox = userSandbox #if isFastMerge == True: # if littleE: # cmsRunNode.application["Executable"] = "edmFastMerge" # else: # cmsRunNode.application["Executable"] = _FastMergeBinary # outputModuleName = "EdmFastMerge" #else: cmsRunNode.application["Executable"] = "cmsRun" outputModuleName = "Merged" # // # // Input Dataset #// datasetBits = DatasetConventions.parseDatasetPath(inputDataset) inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'], datasetBits['Processed']) inDataset["DataTier"] = datasetBits['DataTier'] # // # // Output Dataset #// outputDataset = cmsRunNode.addOutputDataset( dataset['PrimaryDataset'], dataset['ProcessedDataset'], outputModuleName) outputDataset["DataTier"] = dataset['DataTier'] outputDataset["PSetHash"] = dataset['PSetHash'] outputDataset["ApplicationName"] = \ cmsRunNode.application["Executable"] outputDataset["ApplicationProject"] = \ cmsRunNode.application["Project"] outputDataset["ApplicationVersion"] = \ cmsRunNode.application["Version"] outputDataset["ApplicationFamily"] = outputModuleName outputDataset["PhysicsGroup"] = \ procSpec.parameters.get('PhysicsGroup', None) outputDataset['ParentDataset'] = inputDataset # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") if doCleanUp == True: WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1") # // # // Add log archive node #// WorkflowTools.addLogArchNode(cmsRunNode, "logArchive") WorkflowTools.generateFilenames(newWF) results[inputDataset] = newWF return results
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Input Dataset required for Tier0 #// inputDataset = self.cmsRunNode.addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed'] ) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Extract dataset info from cfg #// for outModName in self.configuration.outputModules.keys(): moduleInstance = self.configuration.getOutputModule(outModName) # // # // Data Tier same as input #// dataTier = self.inputDataset['DataTier'] # // # // Output primary dataset same as input primary #// primaryName = self.inputDataset['Primary'] # // # // Output processed dataset #// (Note we pass way more info than is used, since # //conventions have a tendency to change in CMS... # // #// processedName = DatasetConventions.tier0ProcessedDatasetName( Version = self.cmsswVersion, InputPrimaryDataset = self.inputDataset['Primary'], InputProcessedDataset = self.inputDataset['Processed'], Label = self.label, Group = self.group, RequestId = self.requestId, Unmerged = self.unmergedDataset ) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = self.cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ self.cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group outDS["ApplicationFamily"] = outModName if self.inputDataset['IsUsed']: outDS['ParentDataset'] = self.inputDataset['DatasetName'] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % (self.psetHash, guid) else: outDS['PSetHash'] = self.psetHash # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // generate tier0 LFN bases for this workflow #// tier0LFN = self.makeTier0LFN() self.workflow.parameters['MergedLFNBase'] = tier0LFN self.workflow.parameters['UnmergedLFNBase'] = tier0LFN return self.workflow
def makeWorkflowSpec(self, name, enableLazyDownload, configFile = None): """ _makeWorkflowSpec_ Create a workflow spec instance """ self.workflow = WorkflowSpec() self.workflow.setWorkflowName(name) self.workflow.setRequestCategory("data") self.workflow.setRequestTimestamp(int(time.time())) self.workflow.parameters["WorkflowType"] = "Repack" self.workflow.parameters["RequestLabel"] = name self.workflow.parameters["ProdRequestID"] = self.run self.workflow.parameters["RunNumber"] = self.run self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"] cmsRunNode = self.workflow.payload cmsRunNode.name = "cmsRun1" cmsRunNode.type = "CMSSW" cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"] cmsRunNode.application["Executable"] = "cmsRun" cmsRunNode.application["Project"] = "CMSSW" cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"] # runtime express merge script cmsRunNode.scriptControls["PreExe"].append( "T0.ExpressMerger.RuntimeExpressMerger" ) # build the configuration template for the workflow cmsRunNode.cfgInterface = self.buildConfiguration(enableLazyDownload, configFile) if cmsRunNode.cfgInterface == None: return None # generate Dataset information for workflow from cfgInterface for outMod,moduleInstance in cmsRunNode.cfgInterface.outputModules.items(): primaryName = moduleInstance["primaryDataset"] processedName = moduleInstance["processedDataset"] outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outMod) outDS["DataTier"] = moduleInstance["dataTier"] outDS["ApplicationName"] = cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outMod outDS["PhysicsGroup"] = "Tier0" outDS["ApplicationFamily"] = outMod # generate just single LFN stub (all output is merged) # insert them into the output module and dataset info outDS["LFNBase"] = self.getLFN(moduleInstance, dataType = "express") moduleInstance["LFNBase"] = outDS["LFNBase"] moduleInstance["logicalFileName"] = os.path.join( outDS["LFNBase"], "%s.root" % outMod ) WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(cmsRunNode, "logArchive") # override stageout # # FIXME: This hardcodes the TFC LFN prefix !!! ## if svcClass != None: ## finder = NodeFinder("stageOut1") ## self.workflow.payload.operate(finder) ## node = finder.result ## WorkflowTools.addStageOutOverride(node, ## "rfcp", ## "", ## "srm-cms.cern.ch", ## "rfio:///castor?svcClass=%s&path=/castor/cern.ch/cms" % svcClass) return self.workflow