def addOutputModule(self, parentTask, outputModuleName, dataTier, filterName, stepName = "cmsRun1"): """ _addOutputModule_ Add an output module to the given processing task. """ if parentTask.name() == 'Analysis': # TODO in case of user data need to implement policy to define # 1 processedDatasetName # 2 primaryDatasetName # ( 3 dataTier should be always 'USER'.) # 4 then we'll know how to deal with Merge dataTier = 'USER' processedDatasetName = None unmergedLFN = self.userUnmergedLFN mergedLFN = None else: if filterName != None and filterName != "": processedDatasetName = "%s-%s-%s" % (self.acquisitionEra, filterName, self.processingVersion) processingString = "%s-%s" % (filterName, self.processingVersion) else: processedDatasetName = "%s-%s" % (self.acquisitionEra, self.processingVersion) processingString = "%s" % (self.processingVersion) unmergedLFN = "%s/%s/%s/%s/%s" % (self.unmergedLFNBase, self.acquisitionEra, self.inputPrimaryDataset, dataTier, processingString) mergedLFN = "%s/%s/%s/%s/%s" % (self.mergedLFNBase, self.acquisitionEra, self.inputPrimaryDataset, dataTier, processingString) lfnBase(unmergedLFN) lfnBase(mergedLFN) cmsswStep = parentTask.getStep(stepName) cmsswStepHelper = cmsswStep.getTypeHelper() cmsswStepHelper.addOutputModule(outputModuleName, primaryDataset = self.inputPrimaryDataset, processedDataset = processedDatasetName, dataTier = dataTier, filterName = filterName, lfnBase = unmergedLFN, mergedLFNBase = mergedLFN) return {"dataTier": dataTier, "processedDataset": processedDatasetName, "filterName": filterName}
def addOutputModule(self, parentTask, outputModuleName, primaryDataset, dataTier, filterName, stepName = "cmsRun1", forceMerged = False, forceUnmerged = False): """ _addOutputModule_ Add an output module to the given processing task. """ haveFilterName = (filterName != None and filterName != "") haveProcString = (self.processingString != None and self.processingString != "") haveRunNumber = (self.runNumber != None and self.runNumber > 0) processedDataset = "%s-" % self.acquisitionEra if haveFilterName: processedDataset += "%s-" % filterName if haveProcString: processedDataset += "%s-" % self.processingString processedDataset += "v%i" % self.processingVersion if haveProcString: processingLFN = "%s-v%i" % (self.processingString, self.processingVersion) else: processingLFN = "v%i" % self.processingVersion if haveRunNumber: stringRunNumber = str(self.runNumber).zfill(9) runSections = [stringRunNumber[i:i+3] for i in range(0, 9, 3)] runLFN = "/".join(runSections) if parentTask.name() in analysisTaskTypes: # dataTier for user data is always USER dataTier = "USER" # output for user data is always unmerged forceUnmerged = True unmergedLFN = "%s/%s" % (self.unmergedLFNBase, primaryDataset) if haveFilterName: unmergedLFN += "/%s-%s" % (self.acquisitionEra, filterName) else: unmergedLFN += "/%s" % self.acquisitionEra unmergedLFN += "/%s" % processingLFN lfnBase(unmergedLFN) else: unmergedLFN = "%s/%s/%s/%s" % (self.unmergedLFNBase, self.acquisitionEra, primaryDataset, dataTier) mergedLFN = "%s/%s/%s/%s" % (self.mergedLFNBase, self.acquisitionEra, primaryDataset, dataTier) if haveFilterName: unmergedLFN += "/%s-%s" % (filterName, processingLFN) mergedLFN += "/%s-%s" % (filterName, processingLFN) else: unmergedLFN += "/%s" % processingLFN mergedLFN += "/%s" % processingLFN if haveRunNumber: unmergedLFN += "/%s" % runLFN mergedLFN += "/%s" % runLFN lfnBase(unmergedLFN) lfnBase(mergedLFN) isTransient = True if forceMerged: unmergedLFN = mergedLFN isTransient = False elif forceUnmerged: mergedLFN = unmergedLFN cmsswStep = parentTask.getStep(stepName) cmsswStepHelper = cmsswStep.getTypeHelper() cmsswStepHelper.addOutputModule(outputModuleName, primaryDataset = primaryDataset, processedDataset = processedDataset, dataTier = dataTier, filterName = filterName, lfnBase = unmergedLFN, mergedLFNBase = mergedLFN, transient = isTransient) return {"primaryDataset": primaryDataset, "dataTier": dataTier, "processedDataset": processedDataset, "filterName": filterName}
def updateLFNsAndDatasets(self, initialTask=None, runNumber=None): """ _updateLFNsAndDatasets_ Update all the output LFNs and data names for all tasks in the workflow. This needs to be called after updating the acquisition era, processing version or merged/unmerged lfn base. """ mergedLFNBase, unmergedLFNBase = self._getLFNBase() taskType = self.taskType() for stepName in self.listAllStepNames(): stepHelper = self.getStepHelper(stepName) if stepHelper.stepType() == "CMSSW": for outputModuleName in stepHelper.listOutputModules(): outputModule = stepHelper.getOutputModule(outputModuleName) filterName = getattr(outputModule, "filterName", None) if self.getProcessingString(): processingEra = "%s-v%i" % (self.getProcessingString(), self.getProcessingVersion()) else: processingEra = "v%i" % self.getProcessingVersion() if filterName: processedDataset = "%s-%s-%s" % (self.getAcquisitionEra(), filterName, processingEra) processingString = "%s-%s" % (filterName, processingEra) else: processedDataset = "%s-%s" % (self.getAcquisitionEra(), processingEra) processingString = processingEra unmergedLFN = "%s/%s/%s/%s/%s" % ( unmergedLFNBase, self.getAcquisitionEra(), getattr(outputModule, "primaryDataset"), getattr(outputModule, "dataTier"), processingString, ) mergedLFN = "%s/%s/%s/%s/%s" % ( mergedLFNBase, self.getAcquisitionEra(), getattr(outputModule, "primaryDataset"), getattr(outputModule, "dataTier"), processingString, ) if runNumber != None and runNumber > 0: runString = str(runNumber).zfill(9) lfnSuffix = "/%s/%s/%s" % (runString[0:3], runString[3:6], runString[6:9]) unmergedLFN += lfnSuffix mergedLFN += lfnSuffix lfnBase(unmergedLFN) lfnBase(mergedLFN) setattr(outputModule, "processedDataset", processedDataset) # Once we change an output module we must update the subscription information self.updateSubscriptionDataset(outputModuleName, outputModule) # For merge tasks, we want all output to go to the merged LFN base. if taskType == "Merge": setattr(outputModule, "lfnBase", mergedLFN) setattr(outputModule, "mergedLFNBase", mergedLFN) if getattr(outputModule, "dataTier") in ["DQM", "DQMIO"]: datasetName = "/%s/%s/%s" % ( getattr(outputModule, "primaryDataset"), processedDataset, getattr(outputModule, "dataTier"), ) self.updateDatasetName(datasetName) else: setattr(outputModule, "lfnBase", unmergedLFN) setattr(outputModule, "mergedLFNBase", mergedLFN) self.setTaskLogBaseLFN(unmergedLFNBase) # do the samething for all the child for task in self.childTaskIterator(): task.updateLFNsAndDatasets(runNumber=runNumber) return
def addOutputModule( self, parentTask, outputModuleName, primaryDataset, dataTier, filterName, stepName="cmsRun1", forceMerged=False, forceUnmerged=False, ): """ _addOutputModule_ Add an output module to the given processing task. """ haveFilterName = filterName != None and filterName != "" haveProcString = self.processingString != None and self.processingString != "" processedDataset = "%s-" % self.acquisitionEra if haveFilterName: processedDataset += "%s-" % filterName if haveProcString: processedDataset += "%s-" % self.processingString processedDataset += "v%i" % self.processingVersion if haveProcString: processingLFN = "%s-v%i" % (self.processingString, self.processingVersion) else: processingLFN = "v%i" % self.processingVersion if parentTask.name() in analysisTaskTypes: # dataTier for user data is always USER dataTier = "USER" # output for user data is always unmerged forceUnmerged = True unmergedLFN = "%s/%s" % (self.unmergedLFNBase, primaryDataset) if haveFilterName: unmergedLFN += "/%s-%s" % (self.acquisitionEra, filterName) else: unmergedLFN += "/%s" % self.acquisitionEra unmergedLFN += "/%s" % processingLFN lfnBase(unmergedLFN) else: unmergedLFN = "%s/%s/%s/%s" % (self.unmergedLFNBase, self.acquisitionEra, primaryDataset, dataTier) mergedLFN = "%s/%s/%s/%s" % (self.mergedLFNBase, self.acquisitionEra, primaryDataset, dataTier) if haveFilterName: unmergedLFN += "/%s-%s" % (filterName, processingLFN) mergedLFN += "/%s-%s" % (filterName, processingLFN) else: unmergedLFN += "/%s" % processingLFN mergedLFN += "/%s" % processingLFN lfnBase(unmergedLFN) lfnBase(mergedLFN) if forceMerged: unmergedLFN = mergedLFN elif forceUnmerged: mergedLFN = unmergedLFN cmsswStep = parentTask.getStep(stepName) cmsswStepHelper = cmsswStep.getTypeHelper() cmsswStepHelper.addOutputModule( outputModuleName, primaryDataset=primaryDataset, processedDataset=processedDataset, dataTier=dataTier, filterName=filterName, lfnBase=unmergedLFN, mergedLFNBase=mergedLFN, ) return { "primaryDataset": primaryDataset, "dataTier": dataTier, "processedDataset": processedDataset, "filterName": filterName, }
def updateLFNsAndDatasets(self, initialTask=None, runNumber=None): """ _updateLFNsAndDatasets_ Update all the output LFNs and data names for all tasks in the workflow. This needs to be called after updating the acquisition era, processing version or merged/unmerged lfn base. """ mergedLFNBase, unmergedLFNBase = self._getLFNBase() taskType = self.taskType() for stepName in self.listAllStepNames(): stepHelper = self.getStepHelper(stepName) if stepHelper.stepType() == "CMSSW": for outputModuleName in stepHelper.listOutputModules(): outputModule = stepHelper.getOutputModule(outputModuleName) filterName = getattr(outputModule, "filterName", None) if self.getProcessingString(): processingEra = "%s-v%i" % ( self.getProcessingString(), self.getProcessingVersion()) else: processingEra = "v%i" % self.getProcessingVersion() if filterName: processedDataset = "%s-%s-%s" % ( self.getAcquisitionEra(), filterName, processingEra) processingString = "%s-%s" % (filterName, processingEra) else: processedDataset = "%s-%s" % (self.getAcquisitionEra(), processingEra) processingString = processingEra unmergedLFN = "%s/%s/%s/%s/%s" % ( unmergedLFNBase, self.getAcquisitionEra(), getattr(outputModule, "primaryDataset"), getattr(outputModule, "dataTier"), processingString) mergedLFN = "%s/%s/%s/%s/%s" % ( mergedLFNBase, self.getAcquisitionEra(), getattr(outputModule, "primaryDataset"), getattr(outputModule, "dataTier"), processingString) if runNumber != None and runNumber > 0: runString = str(runNumber).zfill(9) lfnSuffix = "/%s/%s/%s" % ( runString[0:3], runString[3:6], runString[6:9]) unmergedLFN += lfnSuffix mergedLFN += lfnSuffix lfnBase(unmergedLFN) lfnBase(mergedLFN) setattr(outputModule, "processedDataset", processedDataset) #Once we change an output module we must update the subscription information self.updateSubscriptionDataset(outputModuleName, outputModule) # For merge tasks, we want all output to go to the merged LFN base. if taskType == "Merge": setattr(outputModule, "lfnBase", mergedLFN) setattr(outputModule, "mergedLFNBase", mergedLFN) if getattr(outputModule, "dataTier") in ["DQM", "DQMIO"]: datasetName = "/%s/%s/%s" % ( getattr(outputModule, "primaryDataset"), processedDataset, getattr(outputModule, "dataTier")) self.updateDatasetName(datasetName) else: setattr(outputModule, "lfnBase", unmergedLFN) setattr(outputModule, "mergedLFNBase", mergedLFN) self.setTaskLogBaseLFN(unmergedLFNBase) # do the samething for all the child for task in self.childTaskIterator(): task.updateLFNsAndDatasets(runNumber=runNumber) return
def addOutputModule(self, parentTask, outputModuleName, primaryDataset, dataTier, filterName, stepName="cmsRun1", forceMerged=False, forceUnmerged=False): """ _addOutputModule_ Add an output module to the given processing task. """ haveFilterName = (filterName != None and filterName != "") haveProcString = (self.processingString != None and self.processingString != "") haveRunNumber = (self.runNumber != None and self.runNumber > 0) processedDataset = "%s-" % self.acquisitionEra if haveFilterName: processedDataset += "%s-" % filterName if haveProcString: processedDataset += "%s-" % self.processingString processedDataset += "v%i" % self.processingVersion if haveProcString: processingLFN = "%s-v%i" % (self.processingString, self.processingVersion) else: processingLFN = "v%i" % self.processingVersion if haveRunNumber: stringRunNumber = str(self.runNumber).zfill(9) runSections = [stringRunNumber[i:i + 3] for i in range(0, 9, 3)] runLFN = "/".join(runSections) if parentTask.name() in analysisTaskTypes: # dataTier for user data is always USER dataTier = "USER" # output for user data is always unmerged forceUnmerged = True unmergedLFN = "%s/%s" % (self.unmergedLFNBase, primaryDataset) if haveFilterName: unmergedLFN += "/%s-%s" % (self.acquisitionEra, filterName) else: unmergedLFN += "/%s" % self.acquisitionEra unmergedLFN += "/%s" % processingLFN lfnBase(unmergedLFN) else: unmergedLFN = "%s/%s/%s/%s" % (self.unmergedLFNBase, self.acquisitionEra, primaryDataset, dataTier) mergedLFN = "%s/%s/%s/%s" % (self.mergedLFNBase, self.acquisitionEra, primaryDataset, dataTier) if haveFilterName: unmergedLFN += "/%s-%s" % (filterName, processingLFN) mergedLFN += "/%s-%s" % (filterName, processingLFN) else: unmergedLFN += "/%s" % processingLFN mergedLFN += "/%s" % processingLFN if haveRunNumber: unmergedLFN += "/%s" % runLFN mergedLFN += "/%s" % runLFN lfnBase(unmergedLFN) lfnBase(mergedLFN) if forceMerged: unmergedLFN = mergedLFN elif forceUnmerged: mergedLFN = unmergedLFN cmsswStep = parentTask.getStep(stepName) cmsswStepHelper = cmsswStep.getTypeHelper() cmsswStepHelper.addOutputModule(outputModuleName, primaryDataset=primaryDataset, processedDataset=processedDataset, dataTier=dataTier, filterName=filterName, lfnBase=unmergedLFN, mergedLFNBase=mergedLFN) return { "primaryDataset": primaryDataset, "dataTier": dataTier, "processedDataset": processedDataset, "filterName": filterName }
def updateLFNsAndDatasets(self, initialTask = None): """ _updateLFNsAndDatasets_ Update all the output LFNs and data names for all tasks in the workflow. This needs to be called after updating the acquisition era, processing version or merged/unmerged lfn base. """ if initialTask: taskIterator = initialTask.childTaskIterator() else: taskIterator = self.taskIterator() for task in taskIterator: taskType = task.taskType() for stepName in task.listAllStepNames(): stepHelper = task.getStepHelper(stepName) if stepHelper.stepType() == "CMSSW" or \ stepHelper.stepType() == "MulticoreCMSSW": for outputModuleName in stepHelper.listOutputModules(): outputModule = stepHelper.getOutputModule(outputModuleName) filterName = getattr(outputModule, "filterName", None) if filterName: processedDataset = "%s-%s-%s" % (task.getAcquisitionEra(), filterName, task.getProcessingVersion()) processingString = "%s-%s" % (filterName, task.getProcessingVersion()) else: processedDataset = "%s-%s" % (task.getAcquisitionEra(), task.getProcessingVersion()) processingString = task.getProcessingVersion() unmergedLFN = "%s/%s/%s/%s/%s" % (self.data.properties.unmergedLFNBase, task.getAcquisitionEra(), getattr(outputModule, "primaryDataset"), getattr(outputModule, "dataTier"), processingString) mergedLFN = "%s/%s/%s/%s/%s" % (self.data.properties.mergedLFNBase, task.getAcquisitionEra(), getattr(outputModule, "primaryDataset"), getattr(outputModule, "dataTier"), processingString) lfnBase(unmergedLFN) lfnBase(mergedLFN) setattr(outputModule, "processedDataset", processedDataset) #Once we change an output module we must update the subscription information task.updateSubscriptionDataset(outputModuleName, outputModule) # For merge tasks, we want all output to go to the merged LFN base. if taskType == "Merge": setattr(outputModule, "lfnBase", mergedLFN) setattr(outputModule, "mergedLFNBase", mergedLFN) if getattr(outputModule, "dataTier") in ["DQM", "DQMROOT"]: datasetName = "/%s/%s/%s" % (getattr(outputModule, "primaryDataset"), processedDataset, getattr(outputModule, "dataTier")) self.updateDatasetName(task, datasetName) else: setattr(outputModule, "lfnBase", unmergedLFN) setattr(outputModule, "mergedLFNBase", mergedLFN) task.setTaskLogBaseLFN(self.data.properties.unmergedLFNBase) self.updateLFNsAndDatasets(task) return
def updateLFNsAndDatasets(self, initialTask=None): """ _updateLFNsAndDatasets_ Update all the output LFNs and data names for all tasks in the workflow. This needs to be called after updating the acquisition era, processing version or merged/unmerged lfn base. """ if initialTask: taskIterator = initialTask.childTaskIterator() else: taskIterator = self.taskIterator() for task in taskIterator: taskType = task.taskType() for stepName in task.listAllStepNames(): stepHelper = task.getStepHelper(stepName) if stepHelper.stepType() == "CMSSW" or stepHelper.stepType() == "MulticoreCMSSW": for outputModuleName in stepHelper.listOutputModules(): outputModule = stepHelper.getOutputModule(outputModuleName) filterName = getattr(outputModule, "filterName", None) if filterName: processedDataset = "%s-%s-%s" % ( self.data.properties.acquisitionEra, filterName, self.data.properties.processingVersion, ) processingString = "%s-%s" % (filterName, self.data.properties.processingVersion) else: processedDataset = "%s-%s" % ( self.data.properties.acquisitionEra, self.data.properties.processingVersion, ) processingString = self.data.properties.processingVersion unmergedLFN = "%s/%s/%s/%s/%s" % ( self.data.properties.unmergedLFNBase, self.data.properties.acquisitionEra, getattr(outputModule, "primaryDataset"), getattr(outputModule, "dataTier"), processingString, ) mergedLFN = "%s/%s/%s/%s/%s" % ( self.data.properties.mergedLFNBase, self.data.properties.acquisitionEra, getattr(outputModule, "primaryDataset"), getattr(outputModule, "dataTier"), processingString, ) lfnBase(unmergedLFN) lfnBase(mergedLFN) setattr(outputModule, "processedDataset", processedDataset) # For merge tasks, we want all output to go to the merged LFN base. if taskType == "Merge": setattr(outputModule, "lfnBase", mergedLFN) setattr(outputModule, "mergedLFNBase", mergedLFN) else: setattr(outputModule, "lfnBase", unmergedLFN) setattr(outputModule, "mergedLFNBase", mergedLFN) task.setTaskLogBaseLFN(self.data.properties.unmergedLFNBase) self.updateLFNsAndDatasets(task) return