def validateDataset( datasetPath, dbsUrl): """ _validateDataset_ Util method to check that the datasetPath provided exists in the dbsUrl provided """ datasetDetails = DatasetConventions.parseDatasetPath(datasetPath) for key in ['Primary', 'DataTier', 'Processed']: if datasetDetails[key] == None: msg = "Invalid Dataset Name: \n ==> %s\n" % datasetPath msg += "Does not contain %s information" % key raise WorkflowMakerError(msg) datasets = [] try: reader = DBSReader(dbsUrl) datasets = reader.matchProcessedDatasets( datasetDetails['Primary'], datasetDetails['DataTier'], datasetDetails['Processed']) except Exception, ex: msg = "Error calling DBS to validate dataset:\n%s\n" % datasetPath msg += str(ex) raise WorkflowMakerError(msg)
def addInputDataset(self, datasetPath): """ _addInputDataset_ If this workflow processes a dataset, set that here NOTE: Is possible to also specify - Split Type (file or event) - Split Size (int) - input DBS Not sure how many of these we want to use. For now, they can be added to the inputDataset dictionary """ datasetBits = DatasetConventions.parseDatasetPath(datasetPath) self.inputDataset.update(datasetBits) self.inputDataset['IsUsed'] = True self.inputDataset['DatasetName'] = datasetPath return
def addPileupDataset(self, datasetName, filesPerJob=10, targetModule=None): """ _addPileupDataset_ Add a dataset to provide pileup overlap. filesPerJob should be 1 in 99.9 % of cases """ pileupDataset = {} pileupDataset['Primary'] = None pileupDataset['Processed'] = None pileupDataset['DataTier'] = None datasetBits = DatasetConventions.parseDatasetPath(datasetName) pileupDataset.update(datasetBits) pileupDataset['FilesPerJob'] = filesPerJob # Target module coould be 'MixingModule' or 'DataMixingModule' for # the moment. If None, MixingModule will be used. pileupDataset['TargetModule'] = targetModule self.pileupDatasets.append(pileupDataset) return
def addPileupDataset(self, datasetName, filesPerJob = 10, targetModule=None): """ _addPileupDataset_ Add a dataset to provide pileup overlap. filesPerJob should be 1 in 99.9 % of cases """ pileupDataset = {} pileupDataset['Primary'] = None pileupDataset['Processed'] = None pileupDataset['DataTier'] = None datasetBits = DatasetConventions.parseDatasetPath(datasetName) pileupDataset.update(datasetBits) pileupDataset['FilesPerJob'] = filesPerJob # Target module coould be 'MixingModule' or 'DataMixingModule' for # the moment. If None, MixingModule will be used. pileupDataset['TargetModule'] = targetModule self.pileupDatasets.append(pileupDataset) return
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[ keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel=self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join( (self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra=self.acquisitionEra, ProcessingString=processingString, ProcessingVersion=self.processingVersion, Unmerged=True) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version=cmsRunNode.application['Version'], Label=self.label, Group=self.group, FilterName=filterName, RequestId=self.requestId, Unmerged=True) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra=self.acquisitionEra, Conditions=self.workflow.parameters['Conditions'], ProcessingVersion=self.workflow. parameters['ProcessingVersion'], FilterName=filterName, Unmerged=True) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset[ 'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets[ '%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow
msg = "--split-size option not provided: This is required" raise RuntimeError, msg try: splitSize = int(splitSize) except ValueError, ex: msg = "--split-size argument is not an integer: %s\n" % splitSize raise RuntimeError, msg #channel0 = DatasetConventions.parseDatasetPath(dataset)['Primary'] if channel == None: # // # // Assume same as input #// channel = DatasetConventions.parseDatasetPath(dataset)['Primary'] # // # // Checking arguments against naming conventions #// if not (re.findall("^v[0-9]+$", processingVersion)): msg = "processing_version '" + processingVersion + \ "' violates naming conventions!\n" \ "Processing version should match this regexp ^v[0-9]+$ " \ "(see https://twiki.cern.ch/twiki/bin/view/CMS/DMWMPG_PrimaryDatasets)" raise RuntimeError, msg if re.findall("[-]+", acquisitionEra):
def createMergeJobWorkflow(procSpec, isFastMerge = True, doCleanUp = True, littleE = False): """ _createMergeJobWorkflow_ Given a Processing Workflow, generate a set of Merge Job workflows that can be used to generate actual merge jobs (as opposed to creating datasets like createMergeDatasetWorkflow) returns a dictionary of (input, IE MergeSensor watched) dataset name to workflow spec instances """ mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge) mergeDatasets = mergeDatasetWF.outputDatasets() results = {} procSpecName = procSpec.workflowName() for dataset in mergeDatasets: inputDataset = dataset['ParentDataset'] newWF = WorkflowSpec() newWF.parameters.update(procSpec.parameters) newWF.setWorkflowName(procSpecName) newWF.parameters['WorkflowType'] = "Merge" cmsRunNode = newWF.payload cmsRunNode.name = "cmsRun1" cmsRunNode.type = "CMSSW" cmsRunNode.application["Project"] = "CMSSW" cmsRunNode.application["Version"] = dataset['ApplicationVersion'] cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323" # // # // Hack to forward UserSandbox to Merge Jobs #// userSandbox = dataset.get("UserSandbox", None) if userSandbox != None: cmsRunNode.userSandbox = userSandbox #if isFastMerge == True: # if littleE: # cmsRunNode.application["Executable"] = "edmFastMerge" # else: # cmsRunNode.application["Executable"] = _FastMergeBinary # outputModuleName = "EdmFastMerge" #else: cmsRunNode.application["Executable"] = "cmsRun" outputModuleName = "Merged" # // # // Input Dataset #// datasetBits = DatasetConventions.parseDatasetPath(inputDataset) inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'], datasetBits['Processed']) inDataset["DataTier"] = datasetBits['DataTier'] # // # // Output Dataset #// outputDataset = cmsRunNode.addOutputDataset( dataset['PrimaryDataset'], dataset['ProcessedDataset'], outputModuleName) outputDataset["DataTier"] = dataset['DataTier'] outputDataset["PSetHash"] = dataset['PSetHash'] outputDataset["ApplicationName"] = \ cmsRunNode.application["Executable"] outputDataset["ApplicationProject"] = \ cmsRunNode.application["Project"] outputDataset["ApplicationVersion"] = \ cmsRunNode.application["Version"] outputDataset["ApplicationFamily"] = outputModuleName outputDataset["PhysicsGroup"] = \ procSpec.parameters.get('PhysicsGroup', None) outputDataset['ParentDataset'] = inputDataset # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") if doCleanUp == True: WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1") # // # // Add log archive node #// WorkflowTools.addLogArchNode(cmsRunNode, "logArchive") WorkflowTools.generateFilenames(newWF) results[inputDataset] = newWF return results
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed'] ) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel = self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join((self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra = self.acquisitionEra, ProcessingString = processingString, ProcessingVersion = self.processingVersion, Unmerged = True ) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version = cmsRunNode.application['Version'], Label = self.label, Group = self.group, FilterName = filterName, RequestId = self.requestId, Unmerged = True ) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra = self.acquisitionEra, Conditions = self.workflow.parameters['Conditions'], ProcessingVersion = self.workflow.parameters['ProcessingVersion'], FilterName = filterName, Unmerged = True ) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset['IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow
def createHarvestingWorkflow(dataset, site, cmsPath, scramArch, cmsswVersion, globalTag, configFile = None, DQMServer = None, proxyLocation = None, DQMCopyToCERN = None, runNumber = None, doStageOut = None): """ _createHarvestingWorkflow_ Create a Harvesting workflow to extract DQM information from a dataset Enters an essentially empty process that will be updated at runtime to use the harvesting cfg from the release. """ datasetPieces = DatasetConventions.parseDatasetPath(dataset) physicsGroup = "OfflineDQM" category = "DQM" if runNumber == None: requestId = "OfflineDQM" label = "%s-%s-%s" % (datasetPieces['Primary'], datasetPieces['Processed'], datasetPieces['DataTier']) channel = "DQMHarvest" else: requestId = "%s-%s" % (datasetPieces["Primary"], datasetPieces["DataTier"]) label = "DQMHarvesting" channel = "Run%s" % runNumber logging.debug("path, arch, ver: %s, %s, %s" % (cmsPath, scramArch, cmsswVersion)) if configFile != None: cfgWrapper = configFromFile(cmsPath, scramArch, cmsswVersion, configFile) else: cfgWrapper = configOnFly(cmsPath, scramArch, cmsswVersion) # // # // Pass in global tag #// cfgWrapper.conditionsTag = globalTag maker = WorkflowMaker(requestId, channel, label ) maker.setCMSSWVersion(cmsswVersion) maker.setPhysicsGroup(physicsGroup) maker.setConfiguration(cfgWrapper, Type = "instance") maker.changeCategory(category) maker.setPSetHash("NO_HASH") maker.addInputDataset(dataset) maker.setActivity('harvesting') spec = maker.makeWorkflow() spec.parameters['WorkflowType'] = "Harvesting" spec.parameters['DBSURL'] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" spec.parameters['OnlySites'] = site if DQMServer != None : spec.parameters['DQMServer'] = DQMServer if proxyLocation != None : spec.parameters['proxyLocation'] = proxyLocation if DQMCopyToCERN != None : spec.parameters['DQMCopyToCERN'] = DQMCopyToCERN if doStageOut is not None: spec.parameters['DoStageOut'] = doStageOut spec.payload.scriptControls['PostTask'].append( "JobCreator.RuntimeTools.RuntimeOfflineDQM") if configFile == None: preExecScript = spec.payload.scriptControls["PreExe"] preExecScript.append("JobCreator.RuntimeTools.RuntimeOfflineDQMSetup") return spec
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Input Dataset required for Tier0 #// inputDataset = self.cmsRunNode.addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Extract dataset info from cfg #// for outModName in self.configuration.outputModules.keys(): moduleInstance = self.configuration.getOutputModule(outModName) # // # // Data Tier same as input #// dataTier = self.inputDataset['DataTier'] # // # // Output primary dataset same as input primary #// primaryName = self.inputDataset['Primary'] # // # // Output processed dataset #// (Note we pass way more info than is used, since # //conventions have a tendency to change in CMS... # // #// processedName = DatasetConventions.tier0ProcessedDatasetName( Version=self.cmsswVersion, InputPrimaryDataset=self.inputDataset['Primary'], InputProcessedDataset=self.inputDataset['Processed'], Label=self.label, Group=self.group, RequestId=self.requestId, Unmerged=self.unmergedDataset) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = self.cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ self.cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group outDS["ApplicationFamily"] = outModName if self.inputDataset['IsUsed']: outDS['ParentDataset'] = self.inputDataset['DatasetName'] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % (self.psetHash, guid) else: outDS['PSetHash'] = self.psetHash # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // generate tier0 LFN bases for this workflow #// tier0LFN = self.makeTier0LFN() self.workflow.parameters['MergedLFNBase'] = tier0LFN self.workflow.parameters['UnmergedLFNBase'] = tier0LFN return self.workflow
def createMergeJobWorkflow(procSpec, isFastMerge=True, doCleanUp=True, littleE=False): """ _createMergeJobWorkflow_ Given a Processing Workflow, generate a set of Merge Job workflows that can be used to generate actual merge jobs (as opposed to creating datasets like createMergeDatasetWorkflow) returns a dictionary of (input, IE MergeSensor watched) dataset name to workflow spec instances """ mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge) mergeDatasets = mergeDatasetWF.outputDatasets() results = {} procSpecName = procSpec.workflowName() for dataset in mergeDatasets: inputDataset = dataset['ParentDataset'] newWF = WorkflowSpec() newWF.parameters.update(procSpec.parameters) newWF.setWorkflowName(procSpecName) newWF.parameters['WorkflowType'] = "Merge" cmsRunNode = newWF.payload cmsRunNode.name = "cmsRun1" cmsRunNode.type = "CMSSW" cmsRunNode.application["Project"] = "CMSSW" cmsRunNode.application["Version"] = dataset['ApplicationVersion'] cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323" # // # // Hack to forward UserSandbox to Merge Jobs #// userSandbox = dataset.get("UserSandbox", None) if userSandbox != None: cmsRunNode.userSandbox = userSandbox #if isFastMerge == True: # if littleE: # cmsRunNode.application["Executable"] = "edmFastMerge" # else: # cmsRunNode.application["Executable"] = _FastMergeBinary # outputModuleName = "EdmFastMerge" #else: cmsRunNode.application["Executable"] = "cmsRun" outputModuleName = "Merged" # // # // Input Dataset #// datasetBits = DatasetConventions.parseDatasetPath(inputDataset) inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'], datasetBits['Processed']) inDataset["DataTier"] = datasetBits['DataTier'] # // # // Output Dataset #// outputDataset = cmsRunNode.addOutputDataset( dataset['PrimaryDataset'], dataset['ProcessedDataset'], outputModuleName) outputDataset["DataTier"] = dataset['DataTier'] outputDataset["PSetHash"] = dataset['PSetHash'] outputDataset["ApplicationName"] = \ cmsRunNode.application["Executable"] outputDataset["ApplicationProject"] = \ cmsRunNode.application["Project"] outputDataset["ApplicationVersion"] = \ cmsRunNode.application["Version"] outputDataset["ApplicationFamily"] = outputModuleName outputDataset["PhysicsGroup"] = \ procSpec.parameters.get('PhysicsGroup', None) outputDataset['ParentDataset'] = inputDataset # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1") if doCleanUp == True: WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1") # // # // Add log archive node #// WorkflowTools.addLogArchNode(cmsRunNode, "logArchive") WorkflowTools.generateFilenames(newWF) results[inputDataset] = newWF return results
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Input Dataset required for Tier0 #// inputDataset = self.cmsRunNode.addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed'] ) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Extract dataset info from cfg #// for outModName in self.configuration.outputModules.keys(): moduleInstance = self.configuration.getOutputModule(outModName) # // # // Data Tier same as input #// dataTier = self.inputDataset['DataTier'] # // # // Output primary dataset same as input primary #// primaryName = self.inputDataset['Primary'] # // # // Output processed dataset #// (Note we pass way more info than is used, since # //conventions have a tendency to change in CMS... # // #// processedName = DatasetConventions.tier0ProcessedDatasetName( Version = self.cmsswVersion, InputPrimaryDataset = self.inputDataset['Primary'], InputProcessedDataset = self.inputDataset['Processed'], Label = self.label, Group = self.group, RequestId = self.requestId, Unmerged = self.unmergedDataset ) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = self.cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ self.cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group outDS["ApplicationFamily"] = outModName if self.inputDataset['IsUsed']: outDS['ParentDataset'] = self.inputDataset['DatasetName'] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % (self.psetHash, guid) else: outDS['PSetHash'] = self.psetHash # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // generate tier0 LFN bases for this workflow #// tier0LFN = self.makeTier0LFN() self.workflow.parameters['MergedLFNBase'] = tier0LFN self.workflow.parameters['UnmergedLFNBase'] = tier0LFN return self.workflow