Exemple #1
0
    def addOutputModule(self, parentTask, outputModuleName, dataTier, filterName,
                        stepName = "cmsRun1"):
        """
        _addOutputModule_

        Add an output module to the given processing task.
        """
        if parentTask.name() == 'Analysis':
            # TODO in case of user data need to implement policy to define
            #  1  processedDatasetName
            #  2  primaryDatasetName
            #  ( 3  dataTier should be always 'USER'.)
            #  4 then we'll know how to deal with Merge
            dataTier = 'USER'
            processedDatasetName = None
            unmergedLFN = self.userUnmergedLFN
            mergedLFN = None
        else:
            if filterName != None and filterName != "":
                processedDatasetName = "%s-%s-%s" % (self.acquisitionEra, filterName,
                                                     self.processingVersion)
                processingString = "%s-%s" % (filterName, self.processingVersion)
            else:
                processedDatasetName = "%s-%s" % (self.acquisitionEra,
                                                  self.processingVersion)
                processingString = "%s" % (self.processingVersion)

            unmergedLFN = "%s/%s/%s/%s/%s" % (self.unmergedLFNBase, self.acquisitionEra,
                                              self.inputPrimaryDataset, dataTier,
                                              processingString)
            mergedLFN = "%s/%s/%s/%s/%s" % (self.mergedLFNBase, self.acquisitionEra,
                                            self.inputPrimaryDataset, dataTier,
                                            processingString)
            lfnBase(unmergedLFN)
            lfnBase(mergedLFN)

        cmsswStep = parentTask.getStep(stepName)
        cmsswStepHelper = cmsswStep.getTypeHelper()
        cmsswStepHelper.addOutputModule(outputModuleName,
                                        primaryDataset = self.inputPrimaryDataset,
                                        processedDataset = processedDatasetName,
                                        dataTier = dataTier,
                                        filterName = filterName,
                                        lfnBase = unmergedLFN,
                                        mergedLFNBase = mergedLFN)

        return {"dataTier": dataTier, "processedDataset": processedDatasetName,
                "filterName": filterName}
Exemple #2
0
    def addOutputModule(self, parentTask, outputModuleName,
                        primaryDataset, dataTier, filterName,
                        stepName = "cmsRun1", forceMerged = False,
                        forceUnmerged = False):
        """
        _addOutputModule_

        Add an output module to the given processing task.

        """
        haveFilterName = (filterName != None and filterName != "")
        haveProcString = (self.processingString != None and self.processingString != "")
        haveRunNumber  = (self.runNumber != None and self.runNumber > 0)

        processedDataset = "%s-" % self.acquisitionEra
        if haveFilterName:
            processedDataset += "%s-" % filterName
        if haveProcString:
            processedDataset += "%s-" % self.processingString
        processedDataset += "v%i" % self.processingVersion

        if haveProcString:
            processingLFN = "%s-v%i" % (self.processingString, self.processingVersion)
        else:
            processingLFN = "v%i" % self.processingVersion

        if haveRunNumber:
            stringRunNumber = str(self.runNumber).zfill(9)
            runSections = [stringRunNumber[i:i+3] for i in range(0, 9, 3)]
            runLFN = "/".join(runSections)


        if parentTask.name() in analysisTaskTypes:

            # dataTier for user data is always USER
            dataTier = "USER"

            # output for user data is always unmerged
            forceUnmerged = True

            unmergedLFN = "%s/%s" % (self.unmergedLFNBase, primaryDataset)

            if haveFilterName:
                unmergedLFN += "/%s-%s" % (self.acquisitionEra, filterName)
            else:
                unmergedLFN += "/%s" % self.acquisitionEra

            unmergedLFN += "/%s" % processingLFN

            lfnBase(unmergedLFN)

        else:

            unmergedLFN = "%s/%s/%s/%s" % (self.unmergedLFNBase,
                                           self.acquisitionEra,
                                           primaryDataset, dataTier)
            mergedLFN = "%s/%s/%s/%s" % (self.mergedLFNBase,
                                         self.acquisitionEra,
                                         primaryDataset, dataTier)

            if haveFilterName:
                unmergedLFN += "/%s-%s" % (filterName, processingLFN)
                mergedLFN += "/%s-%s" % (filterName, processingLFN)
            else:
                unmergedLFN += "/%s" % processingLFN
                mergedLFN += "/%s" % processingLFN

            if haveRunNumber:
                unmergedLFN += "/%s" % runLFN
                mergedLFN += "/%s" % runLFN

            lfnBase(unmergedLFN)
            lfnBase(mergedLFN)

        isTransient = True

        if forceMerged:
            unmergedLFN = mergedLFN
            isTransient = False
        elif forceUnmerged:
            mergedLFN = unmergedLFN

        cmsswStep = parentTask.getStep(stepName)
        cmsswStepHelper = cmsswStep.getTypeHelper()
        cmsswStepHelper.addOutputModule(outputModuleName,
                                        primaryDataset = primaryDataset,
                                        processedDataset = processedDataset,
                                        dataTier = dataTier,
                                        filterName = filterName,
                                        lfnBase = unmergedLFN,
                                        mergedLFNBase = mergedLFN,
                                        transient = isTransient)

        return {"primaryDataset": primaryDataset,
                "dataTier": dataTier,
                "processedDataset": processedDataset,
                "filterName": filterName}
Exemple #3
0
    def updateLFNsAndDatasets(self, initialTask=None, runNumber=None):
        """
        _updateLFNsAndDatasets_

        Update all the output LFNs and data names for all tasks in the workflow.
        This needs to be called after updating the acquisition era, processing
        version or merged/unmerged lfn base.
        """
        mergedLFNBase, unmergedLFNBase = self._getLFNBase()

        taskType = self.taskType()
        for stepName in self.listAllStepNames():
            stepHelper = self.getStepHelper(stepName)

            if stepHelper.stepType() == "CMSSW":
                for outputModuleName in stepHelper.listOutputModules():
                    outputModule = stepHelper.getOutputModule(outputModuleName)
                    filterName = getattr(outputModule, "filterName", None)
                    if self.getProcessingString():
                        processingEra = "%s-v%i" % (self.getProcessingString(), self.getProcessingVersion())
                    else:
                        processingEra = "v%i" % self.getProcessingVersion()
                    if filterName:
                        processedDataset = "%s-%s-%s" % (self.getAcquisitionEra(), filterName, processingEra)
                        processingString = "%s-%s" % (filterName, processingEra)
                    else:
                        processedDataset = "%s-%s" % (self.getAcquisitionEra(), processingEra)
                        processingString = processingEra

                    unmergedLFN = "%s/%s/%s/%s/%s" % (
                        unmergedLFNBase,
                        self.getAcquisitionEra(),
                        getattr(outputModule, "primaryDataset"),
                        getattr(outputModule, "dataTier"),
                        processingString,
                    )
                    mergedLFN = "%s/%s/%s/%s/%s" % (
                        mergedLFNBase,
                        self.getAcquisitionEra(),
                        getattr(outputModule, "primaryDataset"),
                        getattr(outputModule, "dataTier"),
                        processingString,
                    )

                    if runNumber != None and runNumber > 0:
                        runString = str(runNumber).zfill(9)
                        lfnSuffix = "/%s/%s/%s" % (runString[0:3], runString[3:6], runString[6:9])
                        unmergedLFN += lfnSuffix
                        mergedLFN += lfnSuffix

                    lfnBase(unmergedLFN)
                    lfnBase(mergedLFN)
                    setattr(outputModule, "processedDataset", processedDataset)

                    # Once we change an output module we must update the subscription information
                    self.updateSubscriptionDataset(outputModuleName, outputModule)

                    # For merge tasks, we want all output to go to the merged LFN base.
                    if taskType == "Merge":
                        setattr(outputModule, "lfnBase", mergedLFN)
                        setattr(outputModule, "mergedLFNBase", mergedLFN)

                        if getattr(outputModule, "dataTier") in ["DQM", "DQMIO"]:
                            datasetName = "/%s/%s/%s" % (
                                getattr(outputModule, "primaryDataset"),
                                processedDataset,
                                getattr(outputModule, "dataTier"),
                            )
                            self.updateDatasetName(datasetName)
                    else:
                        setattr(outputModule, "lfnBase", unmergedLFN)
                        setattr(outputModule, "mergedLFNBase", mergedLFN)

        self.setTaskLogBaseLFN(unmergedLFNBase)

        # do the samething for all the child
        for task in self.childTaskIterator():
            task.updateLFNsAndDatasets(runNumber=runNumber)

        return
Exemple #4
0
    def addOutputModule(
        self,
        parentTask,
        outputModuleName,
        primaryDataset,
        dataTier,
        filterName,
        stepName="cmsRun1",
        forceMerged=False,
        forceUnmerged=False,
    ):
        """
        _addOutputModule_

        Add an output module to the given processing task.

        """
        haveFilterName = filterName != None and filterName != ""
        haveProcString = self.processingString != None and self.processingString != ""

        processedDataset = "%s-" % self.acquisitionEra
        if haveFilterName:
            processedDataset += "%s-" % filterName
        if haveProcString:
            processedDataset += "%s-" % self.processingString
        processedDataset += "v%i" % self.processingVersion

        if haveProcString:
            processingLFN = "%s-v%i" % (self.processingString, self.processingVersion)
        else:
            processingLFN = "v%i" % self.processingVersion

        if parentTask.name() in analysisTaskTypes:

            # dataTier for user data is always USER
            dataTier = "USER"

            # output for user data is always unmerged
            forceUnmerged = True

            unmergedLFN = "%s/%s" % (self.unmergedLFNBase, primaryDataset)

            if haveFilterName:
                unmergedLFN += "/%s-%s" % (self.acquisitionEra, filterName)
            else:
                unmergedLFN += "/%s" % self.acquisitionEra

            unmergedLFN += "/%s" % processingLFN

            lfnBase(unmergedLFN)

        else:

            unmergedLFN = "%s/%s/%s/%s" % (self.unmergedLFNBase, self.acquisitionEra, primaryDataset, dataTier)
            mergedLFN = "%s/%s/%s/%s" % (self.mergedLFNBase, self.acquisitionEra, primaryDataset, dataTier)

            if haveFilterName:
                unmergedLFN += "/%s-%s" % (filterName, processingLFN)
                mergedLFN += "/%s-%s" % (filterName, processingLFN)
            else:
                unmergedLFN += "/%s" % processingLFN
                mergedLFN += "/%s" % processingLFN

            lfnBase(unmergedLFN)
            lfnBase(mergedLFN)

        if forceMerged:
            unmergedLFN = mergedLFN
        elif forceUnmerged:
            mergedLFN = unmergedLFN

        cmsswStep = parentTask.getStep(stepName)
        cmsswStepHelper = cmsswStep.getTypeHelper()
        cmsswStepHelper.addOutputModule(
            outputModuleName,
            primaryDataset=primaryDataset,
            processedDataset=processedDataset,
            dataTier=dataTier,
            filterName=filterName,
            lfnBase=unmergedLFN,
            mergedLFNBase=mergedLFN,
        )

        return {
            "primaryDataset": primaryDataset,
            "dataTier": dataTier,
            "processedDataset": processedDataset,
            "filterName": filterName,
        }
Exemple #5
0
    def updateLFNsAndDatasets(self, initialTask=None, runNumber=None):
        """
        _updateLFNsAndDatasets_

        Update all the output LFNs and data names for all tasks in the workflow.
        This needs to be called after updating the acquisition era, processing
        version or merged/unmerged lfn base.
        """
        mergedLFNBase, unmergedLFNBase = self._getLFNBase()

        taskType = self.taskType()
        for stepName in self.listAllStepNames():
            stepHelper = self.getStepHelper(stepName)

            if stepHelper.stepType() == "CMSSW":
                for outputModuleName in stepHelper.listOutputModules():
                    outputModule = stepHelper.getOutputModule(outputModuleName)
                    filterName = getattr(outputModule, "filterName", None)
                    if self.getProcessingString():
                        processingEra = "%s-v%i" % (
                            self.getProcessingString(),
                            self.getProcessingVersion())
                    else:
                        processingEra = "v%i" % self.getProcessingVersion()
                    if filterName:
                        processedDataset = "%s-%s-%s" % (
                            self.getAcquisitionEra(), filterName,
                            processingEra)
                        processingString = "%s-%s" % (filterName,
                                                      processingEra)
                    else:
                        processedDataset = "%s-%s" % (self.getAcquisitionEra(),
                                                      processingEra)
                        processingString = processingEra

                    unmergedLFN = "%s/%s/%s/%s/%s" % (
                        unmergedLFNBase, self.getAcquisitionEra(),
                        getattr(outputModule, "primaryDataset"),
                        getattr(outputModule, "dataTier"), processingString)
                    mergedLFN = "%s/%s/%s/%s/%s" % (
                        mergedLFNBase, self.getAcquisitionEra(),
                        getattr(outputModule, "primaryDataset"),
                        getattr(outputModule, "dataTier"), processingString)

                    if runNumber != None and runNumber > 0:
                        runString = str(runNumber).zfill(9)
                        lfnSuffix = "/%s/%s/%s" % (
                            runString[0:3], runString[3:6], runString[6:9])
                        unmergedLFN += lfnSuffix
                        mergedLFN += lfnSuffix

                    lfnBase(unmergedLFN)
                    lfnBase(mergedLFN)
                    setattr(outputModule, "processedDataset", processedDataset)

                    #Once we change an output module we must update the subscription information
                    self.updateSubscriptionDataset(outputModuleName,
                                                   outputModule)

                    # For merge tasks, we want all output to go to the merged LFN base.
                    if taskType == "Merge":
                        setattr(outputModule, "lfnBase", mergedLFN)
                        setattr(outputModule, "mergedLFNBase", mergedLFN)

                        if getattr(outputModule,
                                   "dataTier") in ["DQM", "DQMIO"]:
                            datasetName = "/%s/%s/%s" % (
                                getattr(outputModule,
                                        "primaryDataset"), processedDataset,
                                getattr(outputModule, "dataTier"))
                            self.updateDatasetName(datasetName)
                    else:
                        setattr(outputModule, "lfnBase", unmergedLFN)
                        setattr(outputModule, "mergedLFNBase", mergedLFN)

        self.setTaskLogBaseLFN(unmergedLFNBase)

        # do the samething for all the child
        for task in self.childTaskIterator():
            task.updateLFNsAndDatasets(runNumber=runNumber)

        return
Exemple #6
0
    def addOutputModule(self,
                        parentTask,
                        outputModuleName,
                        primaryDataset,
                        dataTier,
                        filterName,
                        stepName="cmsRun1",
                        forceMerged=False,
                        forceUnmerged=False):
        """
        _addOutputModule_

        Add an output module to the given processing task.

        """
        haveFilterName = (filterName != None and filterName != "")
        haveProcString = (self.processingString != None
                          and self.processingString != "")
        haveRunNumber = (self.runNumber != None and self.runNumber > 0)

        processedDataset = "%s-" % self.acquisitionEra
        if haveFilterName:
            processedDataset += "%s-" % filterName
        if haveProcString:
            processedDataset += "%s-" % self.processingString
        processedDataset += "v%i" % self.processingVersion

        if haveProcString:
            processingLFN = "%s-v%i" % (self.processingString,
                                        self.processingVersion)
        else:
            processingLFN = "v%i" % self.processingVersion

        if haveRunNumber:
            stringRunNumber = str(self.runNumber).zfill(9)
            runSections = [stringRunNumber[i:i + 3] for i in range(0, 9, 3)]
            runLFN = "/".join(runSections)

        if parentTask.name() in analysisTaskTypes:

            # dataTier for user data is always USER
            dataTier = "USER"

            # output for user data is always unmerged
            forceUnmerged = True

            unmergedLFN = "%s/%s" % (self.unmergedLFNBase, primaryDataset)

            if haveFilterName:
                unmergedLFN += "/%s-%s" % (self.acquisitionEra, filterName)
            else:
                unmergedLFN += "/%s" % self.acquisitionEra

            unmergedLFN += "/%s" % processingLFN

            lfnBase(unmergedLFN)

        else:

            unmergedLFN = "%s/%s/%s/%s" % (self.unmergedLFNBase,
                                           self.acquisitionEra, primaryDataset,
                                           dataTier)
            mergedLFN = "%s/%s/%s/%s" % (self.mergedLFNBase,
                                         self.acquisitionEra, primaryDataset,
                                         dataTier)

            if haveFilterName:
                unmergedLFN += "/%s-%s" % (filterName, processingLFN)
                mergedLFN += "/%s-%s" % (filterName, processingLFN)
            else:
                unmergedLFN += "/%s" % processingLFN
                mergedLFN += "/%s" % processingLFN

            if haveRunNumber:
                unmergedLFN += "/%s" % runLFN
                mergedLFN += "/%s" % runLFN

            lfnBase(unmergedLFN)
            lfnBase(mergedLFN)

        if forceMerged:
            unmergedLFN = mergedLFN
        elif forceUnmerged:
            mergedLFN = unmergedLFN

        cmsswStep = parentTask.getStep(stepName)
        cmsswStepHelper = cmsswStep.getTypeHelper()
        cmsswStepHelper.addOutputModule(outputModuleName,
                                        primaryDataset=primaryDataset,
                                        processedDataset=processedDataset,
                                        dataTier=dataTier,
                                        filterName=filterName,
                                        lfnBase=unmergedLFN,
                                        mergedLFNBase=mergedLFN)

        return {
            "primaryDataset": primaryDataset,
            "dataTier": dataTier,
            "processedDataset": processedDataset,
            "filterName": filterName
        }
Exemple #7
0
    def updateLFNsAndDatasets(self, initialTask = None):
        """
        _updateLFNsAndDatasets_

        Update all the output LFNs and data names for all tasks in the workflow.
        This needs to be called after updating the acquisition era, processing
        version or merged/unmerged lfn base.
        """
        if initialTask:
            taskIterator = initialTask.childTaskIterator()
        else:
            taskIterator = self.taskIterator()

        for task in taskIterator:
            taskType = task.taskType()
            for stepName in task.listAllStepNames():
                stepHelper = task.getStepHelper(stepName)

                if stepHelper.stepType() == "CMSSW" or \
                       stepHelper.stepType() == "MulticoreCMSSW":
                    for outputModuleName in stepHelper.listOutputModules():
                        outputModule = stepHelper.getOutputModule(outputModuleName)
                        filterName = getattr(outputModule, "filterName", None)

                        if filterName:
                            processedDataset = "%s-%s-%s" % (task.getAcquisitionEra(),
                                                             filterName,
                                                             task.getProcessingVersion())
                            processingString = "%s-%s" % (filterName,
                                                          task.getProcessingVersion())
                        else:
                            processedDataset = "%s-%s" % (task.getAcquisitionEra(),
                                                          task.getProcessingVersion())
                            processingString = task.getProcessingVersion()

                        unmergedLFN = "%s/%s/%s/%s/%s" % (self.data.properties.unmergedLFNBase,
                                                          task.getAcquisitionEra(),
                                                          getattr(outputModule, "primaryDataset"),
                                                          getattr(outputModule, "dataTier"),
                                                          processingString)
                        mergedLFN = "%s/%s/%s/%s/%s" % (self.data.properties.mergedLFNBase,
                                                        task.getAcquisitionEra(),
                                                        getattr(outputModule, "primaryDataset"),
                                                        getattr(outputModule, "dataTier"),
                                                        processingString)
                        lfnBase(unmergedLFN)
                        lfnBase(mergedLFN)
                        setattr(outputModule, "processedDataset", processedDataset)

                        #Once we change an output module we must update the subscription information
                        task.updateSubscriptionDataset(outputModuleName, outputModule)

                        # For merge tasks, we want all output to go to the merged LFN base.
                        if taskType == "Merge":
                            setattr(outputModule, "lfnBase", mergedLFN)
                            setattr(outputModule, "mergedLFNBase", mergedLFN)

                            if getattr(outputModule, "dataTier") in ["DQM", "DQMROOT"]:
                                datasetName = "/%s/%s/%s" % (getattr(outputModule, "primaryDataset"),
                                                             processedDataset,
                                                             getattr(outputModule, "dataTier"))
                                self.updateDatasetName(task, datasetName)
                        else:
                            setattr(outputModule, "lfnBase", unmergedLFN)
                            setattr(outputModule, "mergedLFNBase", mergedLFN)

            task.setTaskLogBaseLFN(self.data.properties.unmergedLFNBase)
            self.updateLFNsAndDatasets(task)

        return
Exemple #8
0
    def updateLFNsAndDatasets(self, initialTask=None):
        """
        _updateLFNsAndDatasets_

        Update all the output LFNs and data names for all tasks in the workflow.
        This needs to be called after updating the acquisition era, processing
        version or merged/unmerged lfn base.
        """
        if initialTask:
            taskIterator = initialTask.childTaskIterator()
        else:
            taskIterator = self.taskIterator()

        for task in taskIterator:
            taskType = task.taskType()
            for stepName in task.listAllStepNames():
                stepHelper = task.getStepHelper(stepName)

                if stepHelper.stepType() == "CMSSW" or stepHelper.stepType() == "MulticoreCMSSW":
                    for outputModuleName in stepHelper.listOutputModules():
                        outputModule = stepHelper.getOutputModule(outputModuleName)
                        filterName = getattr(outputModule, "filterName", None)

                        if filterName:
                            processedDataset = "%s-%s-%s" % (
                                self.data.properties.acquisitionEra,
                                filterName,
                                self.data.properties.processingVersion,
                            )
                            processingString = "%s-%s" % (filterName, self.data.properties.processingVersion)
                        else:
                            processedDataset = "%s-%s" % (
                                self.data.properties.acquisitionEra,
                                self.data.properties.processingVersion,
                            )
                            processingString = self.data.properties.processingVersion

                        unmergedLFN = "%s/%s/%s/%s/%s" % (
                            self.data.properties.unmergedLFNBase,
                            self.data.properties.acquisitionEra,
                            getattr(outputModule, "primaryDataset"),
                            getattr(outputModule, "dataTier"),
                            processingString,
                        )
                        mergedLFN = "%s/%s/%s/%s/%s" % (
                            self.data.properties.mergedLFNBase,
                            self.data.properties.acquisitionEra,
                            getattr(outputModule, "primaryDataset"),
                            getattr(outputModule, "dataTier"),
                            processingString,
                        )
                        lfnBase(unmergedLFN)
                        lfnBase(mergedLFN)
                        setattr(outputModule, "processedDataset", processedDataset)

                        # For merge tasks, we want all output to go to the merged LFN base.
                        if taskType == "Merge":
                            setattr(outputModule, "lfnBase", mergedLFN)
                            setattr(outputModule, "mergedLFNBase", mergedLFN)
                        else:
                            setattr(outputModule, "lfnBase", unmergedLFN)
                            setattr(outputModule, "mergedLFNBase", mergedLFN)

            task.setTaskLogBaseLFN(self.data.properties.unmergedLFNBase)
            self.updateLFNsAndDatasets(task)

        return