class WorkflowMaker: """ _WorkflowMaker_ Basic MC workflow maker for PR to use to create workflow spec files. """ def __init__(self, requestId, channel, label): self.requestId = requestId self.group = None self.label = label self.timestamp = int(time.time()) self.channel = channel self.cmsswVersions = [] self.configurations = [] self.psetHashes = {} self.origCfgs = {} self.acquisitionEra = None self.processingString = None self.processingVersion = None self.conditions = None # turn on use of proper naming convention for datasets # should be made the default soon, lets deprecate all the old crap self.useProperNamingConventions = False self.options = {} self.options.setdefault('FakeHash', False) # Should we use another attribute for setting the output dataset # status in DBS? self.outputDatasetStatus = 'VALID' self.inputDataset = {} self.inputDataset['IsUsed'] = False self.inputDataset['DatasetName'] = None self.inputDataset['Primary'] = None self.inputDataset['Processed'] = None self.inputDataset['DataTier'] = None # // # // Extra controls over input dataset if required #// self.inputDataset['SplitType'] = None self.inputDataset['SplitSize'] = None self.inputDataset['OnlySites'] = None self.inputDataset['OnlyBlocks'] = None self.inputDataset['OnlyClosedBlocks'] = True # // # // Pileup Dataset controls #// self.pileupDatasets = [] # // # // Initialise basic workflow #// self.workflow = WorkflowSpec() self.workflowName = "%s-%s-%s" % (label, channel, requestId) self.workflow.setWorkflowName(self.workflowName) self.workflow.setRequestCategory("mc") self.workflow.setRequestTimestamp(self.timestamp) self.workflow.parameters['RequestLabel'] = self.label self.workflow.parameters['ProdRequestID'] = self.requestId self.cmsRunNode = self.workflow.payload self.cmsRunNode.name = "cmsRun1" self.cmsRunNode.type = "CMSSW" self.cmsRunNodes = [self.cmsRunNode] self.saveOutputFor = [] def chainCmsRunNode(self, stageOutIntermediates=False, *outputModules): """ append a cmsRun config to the current cmsRun node and chain them """ if stageOutIntermediates: #Do we want to keep cmsRunNode's products? self.saveOutputFor.append(self.cmsRunNode.name) newnode = self.cmsRunNode.newNode("cmsRun%s" % (len(self.cmsRunNodes) + 1)) newnode.type = "CMSSW" if not outputModules: outputModules = self.configurations[-1].outputModules.keys() for outmodule in outputModules: newnode.addInputLink(self.cmsRunNode.name, outmodule, 'source', AppearStandalone=not stageOutIntermediates) self.cmsRunNode = newnode self.cmsRunNodes.append(newnode) def changeCategory(self, newCategory): """ _changeCategory_ Change the workflow category from the default mc that appears in the LFNs """ self.workflow.setRequestCategory(newCategory) return def setAcquisitionEra(self, era): """ _setAcquisitionEra_ Sets the AcquisitionEra in the workflow """ self.workflow.setAcquisitionEra(era) self.acquisitionEra = era return def setNamingConventionParameters(self, era, procString, procVers): """ _setNamingConventionParameters_ Sets AcquisitionEra, ProcessingString and ProcessingVersion """ self.workflow.setAcquisitionEra(era) self.workflow.parameters['ProcessingString'] = procString self.workflow.parameters['ProcessingVersion'] = procVers self.acquisitionEra = era self.processingString = procString self.processingVersion = procVers self.useProperNamingConventions = True return def setActivity(self, activity): """ _changeWorkflowType_ Set the workflow type i.e. Simulation, Reconstruction, Reprocessing, Skimming """ self.workflow.setActivity(activity) return def setCMSSWVersion(self, version): """ _setCMSSWVersion_ Set the version of CMSSW to be used """ self.cmsswVersions.append(version) self.cmsRunNode.application['Version'] = version self.cmsRunNode.application['Executable'] = "cmsRun" self.cmsRunNode.application['Project'] = "CMSSW" self.cmsRunNode.application['Architecture'] = "" return def setUserSandbox(self, sandboxloc): """ _setSandbox_ Sets the location of the user sandbox """ self.cmsRunNode.userSandbox = sandboxloc return def setPhysicsGroup(self, group): """ _setPhysicsGroup_ Physics Group owning the workflow """ self.group = group self.workflow.parameters['PhysicsGroup'] = self.group return def setConfiguration(self, cfgFile, **args): """ _setConfiguration_ Provide the CMSSW configuration to be used. By default, assume that cfgFile is a python format string. The format & type can be specified using args: - Type : must be "file" or "string" or "instance" """ cfgType = args.get("Type", "instance") if cfgType not in ("file", "string", "instance"): msg = "Illegal Type for cfg file: %s\n" % cfgType msg += "Should be \"file\" or \"string\"\n" raise RuntimeError, msg cfgContent = cfgFile if cfgType == "file": cfgContent = file(cfgFile).read() cfgType = "string" if cfgType == "string": cfgData = cfgContent cfgContent = CMSSWConfig() cfgContent.unpack(cfgData) self.cmsRunNode.cfgInterface = cfgContent self.configurations.append(cfgContent) return def setOriginalCfg(self, honkingGreatString): """ _setOriginalCfg_ Set the original cfg file content that is to be recorded in DBS CALL THIS METHOD AFTER setConfiguration """ sep = '\n\n### Next chained config file ###\n\n' cfg = '' for link in self.cmsRunNode._InputLinks: if link['AppearStandalone']: prev_config = self.origCfgs.get(link['InputNode'], '') if prev_config: cfg = '%s%s%s' % (cfg, prev_config, sep) cfg = '%s%s' % (cfg, honkingGreatString) self.cmsRunNode.cfgInterface.originalCfg = cfg self.origCfgs[self.cmsRunNode.name] = cfg return def setPSetHash(self, hashValue): """ _setPSetHash_ Set the value for the PSetHash If any InputLinks are present their pset hashes are prepended """ hash = '' for link in self.cmsRunNode._InputLinks: if link['AppearStandalone']: prev_node_hash = self.psetHashes.get(link['InputNode'], None) if prev_node_hash: # cmsGen nodes will be missing hash = '%s%s_' % (hash, prev_node_hash) hash = '%s%s' % (hash, hashValue) self.psetHashes[self.cmsRunNode.name] = hash return def addInputDataset(self, datasetPath): """ _addInputDataset_ If this workflow processes a dataset, set that here NOTE: Is possible to also specify - Split Type (file or event) - Split Size (int) - input DBS Not sure how many of these we want to use. For now, they can be added to the inputDataset dictionary """ datasetBits = DatasetConventions.parseDatasetPath(datasetPath) self.inputDataset.update(datasetBits) self.inputDataset['IsUsed'] = True self.inputDataset['DatasetName'] = datasetPath return def addPileupDataset(self, datasetName, filesPerJob=10, targetModule=None): """ _addPileupDataset_ Add a dataset to provide pileup overlap. filesPerJob should be 1 in 99.9 % of cases """ pileupDataset = {} pileupDataset['Primary'] = None pileupDataset['Processed'] = None pileupDataset['DataTier'] = None datasetBits = DatasetConventions.parseDatasetPath(datasetName) pileupDataset.update(datasetBits) pileupDataset['FilesPerJob'] = filesPerJob # Target module coould be 'MixingModule' or 'DataMixingModule' for # the moment. If None, MixingModule will be used. pileupDataset['TargetModule'] = targetModule self.pileupDatasets.append(pileupDataset) return def addFinalDestination(self, *phedexNodeNames): """ _addFinalDestination_ Add a final destination that can be used to generate a PhEDEx subscription so that the data gets transferred to some final location. NOTE: Do we want to support a list of PhEDEx nodes? Eg CERN + FNAL """ nameList = "" for nodeName in phedexNodeNames: nameList += "%s," % nodeName nameList = nameList[:-1] self.workflow.parameters['PhEDExDestination'] = nameList return def addSelectionEfficiency(self, selectionEff): """ _addSelectionEfficiency_ Do we have a selection efficiency? """ self.cmsRunNode.applicationControls["SelectionEfficiency"] = \ selectionEff return def setOutputDatasetDbsStatus(self, status): """ _setOutputDatasetDbsStatus_ The output datasets will have this status in the field dataset.status. This value will be use when registering the output dataset in DBS. Only two values are acepted: - VALID - PRODUCTION """ if status in ('VALID', 'PRODUCTION'): self.outputDatasetStatus = status return def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[ keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel=self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join( (self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra=self.acquisitionEra, ProcessingString=processingString, ProcessingVersion=self.processingVersion, Unmerged=True) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version=cmsRunNode.application['Version'], Label=self.label, Group=self.group, FilterName=filterName, RequestId=self.requestId, Unmerged=True) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra=self.acquisitionEra, Conditions=self.workflow.parameters['Conditions'], ProcessingVersion=self.workflow. parameters['ProcessingVersion'], FilterName=filterName, Unmerged=True) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset[ 'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets[ '%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow def _Validate(self): """ _Validate_ Private method to test all options are set. Throws a WorkflowMakerError if any problems found """ notNoneAttrs = [ "requestId", "label", "group", "channel", ] for attrName in notNoneAttrs: value = getattr(self, attrName, None) if value == None: msg = "Attribute Not Set: %s" % attrName raise WorkflowMakerError(msg) if not len(self.configurations): msg = "Attribute Not Set: configurations" raise WorkflowMakerError(msg) if len(self.configurations) != len(self.cmsswVersions): msg = "len(self.configurations) != len(self.cmsswVersions)" raise WorkflowMakerError(msg) return
class WorkflowMaker: """ _WorkflowMaker_ Basic MC workflow maker for PR to use to create workflow spec files. """ def __init__(self, requestId, channel, label): self.requestId = requestId self.group = None self.label = label self.timestamp = int(time.time()) self.channel = channel self.cmsswVersions = [] self.configurations = [] self.psetHashes = {} self.origCfgs = {} self.acquisitionEra = None self.processingString = None self.processingVersion = None self.conditions = None # turn on use of proper naming convention for datasets # should be made the default soon, lets deprecate all the old crap self.useProperNamingConventions = False self.options = {} self.options.setdefault('FakeHash', False) # Should we use another attribute for setting the output dataset # status in DBS? self.outputDatasetStatus = 'VALID' self.inputDataset = {} self.inputDataset['IsUsed'] = False self.inputDataset['DatasetName'] = None self.inputDataset['Primary'] = None self.inputDataset['Processed'] = None self.inputDataset['DataTier'] = None # // # // Extra controls over input dataset if required #// self.inputDataset['SplitType'] = None self.inputDataset['SplitSize'] = None self.inputDataset['OnlySites'] = None self.inputDataset['OnlyBlocks'] = None self.inputDataset['OnlyClosedBlocks'] = True # // # // Pileup Dataset controls #// self.pileupDatasets = [] # // # // Initialise basic workflow #// self.workflow = WorkflowSpec() self.workflowName = "%s-%s-%s" % (label, channel, requestId) self.workflow.setWorkflowName(self.workflowName) self.workflow.setRequestCategory("mc") self.workflow.setRequestTimestamp(self.timestamp) self.workflow.parameters['RequestLabel'] = self.label self.workflow.parameters['ProdRequestID'] = self.requestId self.cmsRunNode = self.workflow.payload self.cmsRunNode.name = "cmsRun1" self.cmsRunNode.type = "CMSSW" self.cmsRunNodes = [self.cmsRunNode] self.saveOutputFor = [] def chainCmsRunNode(self, stageOutIntermediates = False, *outputModules): """ append a cmsRun config to the current cmsRun node and chain them """ if stageOutIntermediates: #Do we want to keep cmsRunNode's products? self.saveOutputFor.append(self.cmsRunNode.name) newnode = self.cmsRunNode.newNode("cmsRun%s" % (len(self.cmsRunNodes) + 1)) newnode.type = "CMSSW" if not outputModules: outputModules = self.configurations[-1].outputModules.keys() for outmodule in outputModules: newnode.addInputLink(self.cmsRunNode.name, outmodule, 'source', AppearStandalone = not stageOutIntermediates) self.cmsRunNode = newnode self.cmsRunNodes.append(newnode) def changeCategory(self, newCategory): """ _changeCategory_ Change the workflow category from the default mc that appears in the LFNs """ self.workflow.setRequestCategory(newCategory) return def setAcquisitionEra(self,era): """ _setAcquisitionEra_ Sets the AcquisitionEra in the workflow """ self.workflow.setAcquisitionEra(era) self.acquisitionEra=era return def setNamingConventionParameters(self, era, procString, procVers): """ _setNamingConventionParameters_ Sets AcquisitionEra, ProcessingString and ProcessingVersion """ self.workflow.setAcquisitionEra(era) self.workflow.parameters['ProcessingString'] = procString self.workflow.parameters['ProcessingVersion'] = procVers self.acquisitionEra=era self.processingString = procString self.processingVersion = procVers self.useProperNamingConventions = True return def setActivity(self, activity): """ _changeWorkflowType_ Set the workflow type i.e. Simulation, Reconstruction, Reprocessing, Skimming """ self.workflow.setActivity(activity) return def setCMSSWVersion(self, version): """ _setCMSSWVersion_ Set the version of CMSSW to be used """ self.cmsswVersions.append(version) self.cmsRunNode.application['Version'] = version self.cmsRunNode.application['Executable'] = "cmsRun" self.cmsRunNode.application['Project'] = "CMSSW" self.cmsRunNode.application['Architecture'] = "" return def setUserSandbox(self,sandboxloc): """ _setSandbox_ Sets the location of the user sandbox """ self.cmsRunNode.userSandbox=sandboxloc return def setPhysicsGroup(self, group): """ _setPhysicsGroup_ Physics Group owning the workflow """ self.group = group self.workflow.parameters['PhysicsGroup'] = self.group return def setConfiguration(self, cfgFile, **args): """ _setConfiguration_ Provide the CMSSW configuration to be used. By default, assume that cfgFile is a python format string. The format & type can be specified using args: - Type : must be "file" or "string" or "instance" """ cfgType = args.get("Type", "instance") if cfgType not in ("file", "string", "instance"): msg = "Illegal Type for cfg file: %s\n" % cfgType msg += "Should be \"file\" or \"string\"\n" raise RuntimeError, msg cfgContent = cfgFile if cfgType == "file": cfgContent = file(cfgFile).read() cfgType = "string" if cfgType == "string": cfgData = cfgContent cfgContent = CMSSWConfig() cfgContent.unpack(cfgData) self.cmsRunNode.cfgInterface = cfgContent self.configurations.append(cfgContent) return def setOriginalCfg(self, honkingGreatString): """ _setOriginalCfg_ Set the original cfg file content that is to be recorded in DBS CALL THIS METHOD AFTER setConfiguration """ sep = '\n\n### Next chained config file ###\n\n' cfg = '' for link in self.cmsRunNode._InputLinks: if link['AppearStandalone']: prev_config = self.origCfgs.get(link['InputNode'], '') if prev_config: cfg = '%s%s%s' % (cfg, prev_config, sep) cfg = '%s%s' % (cfg, honkingGreatString) self.cmsRunNode.cfgInterface.originalCfg = cfg self.origCfgs[self.cmsRunNode.name] = cfg return def setPSetHash(self, hashValue): """ _setPSetHash_ Set the value for the PSetHash If any InputLinks are present their pset hashes are prepended """ hash = '' for link in self.cmsRunNode._InputLinks: if link['AppearStandalone']: prev_node_hash = self.psetHashes.get(link['InputNode'], None) if prev_node_hash: # cmsGen nodes will be missing hash = '%s%s_' % (hash, prev_node_hash) hash = '%s%s' % (hash, hashValue) self.psetHashes[self.cmsRunNode.name] = hash return def addInputDataset(self, datasetPath): """ _addInputDataset_ If this workflow processes a dataset, set that here NOTE: Is possible to also specify - Split Type (file or event) - Split Size (int) - input DBS Not sure how many of these we want to use. For now, they can be added to the inputDataset dictionary """ datasetBits = DatasetConventions.parseDatasetPath(datasetPath) self.inputDataset.update(datasetBits) self.inputDataset['IsUsed'] = True self.inputDataset['DatasetName'] = datasetPath return def addPileupDataset(self, datasetName, filesPerJob = 10, targetModule=None): """ _addPileupDataset_ Add a dataset to provide pileup overlap. filesPerJob should be 1 in 99.9 % of cases """ pileupDataset = {} pileupDataset['Primary'] = None pileupDataset['Processed'] = None pileupDataset['DataTier'] = None datasetBits = DatasetConventions.parseDatasetPath(datasetName) pileupDataset.update(datasetBits) pileupDataset['FilesPerJob'] = filesPerJob # Target module coould be 'MixingModule' or 'DataMixingModule' for # the moment. If None, MixingModule will be used. pileupDataset['TargetModule'] = targetModule self.pileupDatasets.append(pileupDataset) return def addFinalDestination(self, *phedexNodeNames): """ _addFinalDestination_ Add a final destination that can be used to generate a PhEDEx subscription so that the data gets transferred to some final location. NOTE: Do we want to support a list of PhEDEx nodes? Eg CERN + FNAL """ nameList = "" for nodeName in phedexNodeNames: nameList += "%s," % nodeName nameList = nameList[:-1] self.workflow.parameters['PhEDExDestination'] = nameList return def addSelectionEfficiency(self, selectionEff): """ _addSelectionEfficiency_ Do we have a selection efficiency? """ self.cmsRunNode.applicationControls["SelectionEfficiency"] = \ selectionEff return def setOutputDatasetDbsStatus(self, status): """ _setOutputDatasetDbsStatus_ The output datasets will have this status in the field dataset.status. This value will be use when registering the output dataset in DBS. Only two values are acepted: - VALID - PRODUCTION """ if status in ('VALID', 'PRODUCTION'): self.outputDatasetStatus = status return def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed'] ) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel = self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join((self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra = self.acquisitionEra, ProcessingString = processingString, ProcessingVersion = self.processingVersion, Unmerged = True ) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version = cmsRunNode.application['Version'], Label = self.label, Group = self.group, FilterName = filterName, RequestId = self.requestId, Unmerged = True ) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra = self.acquisitionEra, Conditions = self.workflow.parameters['Conditions'], ProcessingVersion = self.workflow.parameters['ProcessingVersion'], FilterName = filterName, Unmerged = True ) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset['IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow def _Validate(self): """ _Validate_ Private method to test all options are set. Throws a WorkflowMakerError if any problems found """ notNoneAttrs = [ "requestId", "label", "group", "channel", ] for attrName in notNoneAttrs: value = getattr(self, attrName, None) if value == None: msg = "Attribute Not Set: %s" % attrName raise WorkflowMakerError(msg) if not len(self.configurations): msg = "Attribute Not Set: configurations" raise WorkflowMakerError(msg) if len(self.configurations) != len(self.cmsswVersions): msg = "len(self.configurations) != len(self.cmsswVersions)" raise WorkflowMakerError(msg) return