def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[ keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel=self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join( (self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra=self.acquisitionEra, ProcessingString=processingString, ProcessingVersion=self.processingVersion, Unmerged=True) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version=cmsRunNode.application['Version'], Label=self.label, Group=self.group, FilterName=filterName, RequestId=self.requestId, Unmerged=True) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra=self.acquisitionEra, Conditions=self.workflow.parameters['Conditions'], ProcessingVersion=self.workflow. parameters['ProcessingVersion'], FilterName=filterName, Unmerged=True) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset[ 'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets[ '%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Input Dataset required for Tier0 #// inputDataset = self.cmsRunNode.addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Extract dataset info from cfg #// for outModName in self.configuration.outputModules.keys(): moduleInstance = self.configuration.getOutputModule(outModName) # // # // Data Tier same as input #// dataTier = self.inputDataset['DataTier'] # // # // Output primary dataset same as input primary #// primaryName = self.inputDataset['Primary'] # // # // Output processed dataset #// (Note we pass way more info than is used, since # //conventions have a tendency to change in CMS... # // #// processedName = DatasetConventions.tier0ProcessedDatasetName( Version=self.cmsswVersion, InputPrimaryDataset=self.inputDataset['Primary'], InputProcessedDataset=self.inputDataset['Processed'], Label=self.label, Group=self.group, RequestId=self.requestId, Unmerged=self.unmergedDataset) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = self.cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ self.cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group outDS["ApplicationFamily"] = outModName if self.inputDataset['IsUsed']: outDS['ParentDataset'] = self.inputDataset['DatasetName'] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % (self.psetHash, guid) else: outDS['PSetHash'] = self.psetHash # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // generate tier0 LFN bases for this workflow #// tier0LFN = self.makeTier0LFN() self.workflow.parameters['MergedLFNBase'] = tier0LFN self.workflow.parameters['UnmergedLFNBase'] = tier0LFN return self.workflow
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed'] ) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel = self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join((self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra = self.acquisitionEra, ProcessingString = processingString, ProcessingVersion = self.processingVersion, Unmerged = True ) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version = cmsRunNode.application['Version'], Label = self.label, Group = self.group, FilterName = filterName, RequestId = self.requestId, Unmerged = True ) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra = self.acquisitionEra, Conditions = self.workflow.parameters['Conditions'], ProcessingVersion = self.workflow.parameters['ProcessingVersion'], FilterName = filterName, Unmerged = True ) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset['IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Input Dataset required for Tier0 #// inputDataset = self.cmsRunNode.addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed'] ) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Extract dataset info from cfg #// for outModName in self.configuration.outputModules.keys(): moduleInstance = self.configuration.getOutputModule(outModName) # // # // Data Tier same as input #// dataTier = self.inputDataset['DataTier'] # // # // Output primary dataset same as input primary #// primaryName = self.inputDataset['Primary'] # // # // Output processed dataset #// (Note we pass way more info than is used, since # //conventions have a tendency to change in CMS... # // #// processedName = DatasetConventions.tier0ProcessedDatasetName( Version = self.cmsswVersion, InputPrimaryDataset = self.inputDataset['Primary'], InputProcessedDataset = self.inputDataset['Processed'], Label = self.label, Group = self.group, RequestId = self.requestId, Unmerged = self.unmergedDataset ) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = self.cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ self.cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group outDS["ApplicationFamily"] = outModName if self.inputDataset['IsUsed']: outDS['ParentDataset'] = self.inputDataset['DatasetName'] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % (self.psetHash, guid) else: outDS['PSetHash'] = self.psetHash # // # // Add Stage Out node #// WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1") WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // generate tier0 LFN bases for this workflow #// tier0LFN = self.makeTier0LFN() self.workflow.parameters['MergedLFNBase'] = tier0LFN self.workflow.parameters['UnmergedLFNBase'] = tier0LFN return self.workflow