コード例 #1
0
ファイル: CleanUpTools.py プロジェクト: PerilousApricot/CRAB2
def createCleanupWorkflowSpec():
    """
    _createCleanupWorkflowSpec_

    Create a generic cleanup WorkflowSpec definition
    that can be used to generate a sanbox for cleanup jobs

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("CleanUp-%s" % timestamp)
    workflow.setActivity("CleanUp")
    workflow.setRequestCategory("mc-cleanup")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters['WorkflowType']="CleanUp"
    

    cleanUp = workflow.payload
    cleanUp.name = "cleanUp1"
    cleanUp.type = "CleanUp" 
    
    cleanUp.application["Project"] = ""
    cleanUp.application["Version"] = ""
    cleanUp.application["Architecture"] = ""
    cleanUp.application["Executable"] = "RuntimeCleanUp.py" # binary name
    cleanUp.configuration = ""
    cleanUp.cfgInterface = None


    return workflow
コード例 #2
0
def createCleanupWorkflowSpec():
    """
    _createCleanupWorkflowSpec_

    Create a generic cleanup WorkflowSpec definition
    that can be used to generate a sanbox for cleanup jobs

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("CleanUp-%s" % timestamp)
    workflow.setActivity("CleanUp")
    workflow.setRequestCategory("mc-cleanup")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters['WorkflowType'] = "CleanUp"

    cleanUp = workflow.payload
    cleanUp.name = "cleanUp1"
    cleanUp.type = "CleanUp"

    cleanUp.application["Project"] = ""
    cleanUp.application["Version"] = ""
    cleanUp.application["Architecture"] = ""
    cleanUp.application["Executable"] = "RuntimeCleanUp.py"  # binary name
    cleanUp.configuration = ""
    cleanUp.cfgInterface = None

    return workflow
コード例 #3
0
def createProductionWorkflow(prodName,
                             cmsswVersion,
                             cfgFile=None,
                             category="mc",
                             **args):
    """
    _createProductionWorkflow_

    Create a Production style workflow, ie generation of new events

    """

    timestamp = int(time.time())
    if args.get("PyCfg", None) == None:
        if cfgFile == None:
            msg = "Error: No Cfg File or python cfg file provided to createProductionWorkflow"
            raise RuntimeError, msg
        pycfgFile = createPythonConfig(cfgFile)
        pycfgFileContent = file(pycfgFile).read()
    else:
        pycfgFileContent = args['PyCfg']

    if args.get("PSetHash", None) == None:
        realPSetHash = createPSetHash(cfgFile)
    else:
        realPSetHash = args['PSetHash']

    #  //
    # // Create a new WorkflowSpec and set its name
    #//
    spec = WorkflowSpec()
    workflowname = "%s__%s-%s-%s-%s" % (
        prodName, cmsswVersion, args.get("processingLabel", "Test07"),
        args.get("physicsGroup", "NoPhysicsGroup"), timestamp)
    spec.setWorkflowName(workflowname)
    spec.setRequestCategory(category)
    spec.setRequestTimestamp(timestamp)

    cmsRun = spec.payload
    populateCMSRunNode(cmsRun,
                       "cmsRun1",
                       cmsswVersion,
                       pycfgFileContent,
                       realPSetHash,
                       timestamp,
                       prodName,
                       physicsGroup=args.get("physicsGroup", "NoPhysicsGroup"),
                       processingLabel=args.get("processingLabel", "Test07"),
                       fakeHash=args.get("FakeHash", False))

    addStageOutNode(cmsRun, "stageOut1")
    generateFilenames(spec)
    return spec
コード例 #4
0
def createProductionWorkflow(prodName, cmsswVersion, cfgFile = None,
                             category = "mc", **args):
    """
    _createProductionWorkflow_

    Create a Production style workflow, ie generation of new events

    """

    timestamp = int(time.time())
    if args.get("PyCfg", None) == None:
        if cfgFile == None:
            msg = "Error: No Cfg File or python cfg file provided to createProductionWorkflow"
            raise RuntimeError, msg
        pycfgFile = createPythonConfig(cfgFile)
        pycfgFileContent = file(pycfgFile).read()
    else:
        pycfgFileContent = args['PyCfg']



    if args.get("PSetHash", None) == None:
        realPSetHash = createPSetHash(cfgFile)
    else:
        realPSetHash = args['PSetHash']


    #  //
    # // Create a new WorkflowSpec and set its name
    #//
    spec = WorkflowSpec()
    workflowname = "%s__%s-%s-%s-%s"%(prodName,cmsswVersion,args.get("processingLabel","Test07"),args.get("physicsGroup","NoPhysicsGroup"),timestamp)
    spec.setWorkflowName(workflowname)
    spec.setRequestCategory(category)
    spec.setRequestTimestamp(timestamp)

    cmsRun = spec.payload
    populateCMSRunNode(cmsRun, "cmsRun1", cmsswVersion, pycfgFileContent, realPSetHash,
                       timestamp, prodName, physicsGroup = args.get("physicsGroup", "NoPhysicsGroup"), processingLabel=args.get("processingLabel", "Test07"), fakeHash = args.get("FakeHash", False))


    addStageOutNode(cmsRun, "stageOut1")
    generateFilenames(spec)
    return spec
コード例 #5
0
def createLogCollectorWorkflowSpec(wf):
    """
    _createLogColectorWorkflowSpec_

    Create a generic LogArchive WorkflowSpec definition

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("LogCollect-%s" % timestamp)
    workflow.setActivity("LogCollect")
    workflow.setRequestCategory("logcollect")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters['WorkflowType'] = "LogCollect"

    logArchive = workflow.payload
    logArchive.name = "logCollect1"
    logArchive.type = "LogCollect"
    #TODO: remove this?
    #logArchive.workflow = wf
    logArchive.configuration
    logArchive.application["Project"] = ""
    logArchive.application["Version"] = ""
    logArchive.application["Architecture"] = ""
    logArchive.application[
        "Executable"] = "RuntimeLogCollector.py"  # binary name
    logArchive.configuration = ""
    logArchive.cfgInterface = None

    #set stageOut override
    #cfg = IMProvNode("config")
    #stageOut = IMProvNode("StageOutParameters")
    #cfg.addNode()
    #WorkflowTools.addStageOutNode(logArchive, "StageOut1")
    #WorkflowTools.addStageOutOverride(logArchive, stageOutParams['command'],
    #                                  stageOutParams['option'],
    #                                  stageOutParams['se-name'],
    #                                  stageOutParams['lfnPrefix'])

    return workflow
コード例 #6
0
def createLogCollectorWorkflowSpec(wf):
    """
    _createLogColectorWorkflowSpec_

    Create a generic LogArchive WorkflowSpec definition

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("LogCollect-%s" % timestamp)
    workflow.setActivity("LogCollect")
    workflow.setRequestCategory("logcollect")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters["WorkflowType"] = "LogCollect"

    logArchive = workflow.payload
    logArchive.name = "logCollect1"
    logArchive.type = "LogCollect"
    # TODO: remove this?
    # logArchive.workflow = wf
    logArchive.configuration
    logArchive.application["Project"] = ""
    logArchive.application["Version"] = ""
    logArchive.application["Architecture"] = ""
    logArchive.application["Executable"] = "RuntimeLogCollector.py"  # binary name
    logArchive.configuration = ""
    logArchive.cfgInterface = None

    # set stageOut override
    # cfg = IMProvNode("config")
    # stageOut = IMProvNode("StageOutParameters")
    # cfg.addNode()
    # WorkflowTools.addStageOutNode(logArchive, "StageOut1")
    # WorkflowTools.addStageOutOverride(logArchive, stageOutParams['command'],
    #                                  stageOutParams['option'],
    #                                  stageOutParams['se-name'],
    #                                  stageOutParams['lfnPrefix'])

    return workflow
コード例 #7
0
class WorkflowMaker:
    """
    _WorkflowMaker_

    Basic MC workflow maker for PR to use to create workflow spec files.
    
    """
    def __init__(self, requestId, channel, label):
        self.requestId = requestId
        self.group = None
        self.label = label
        self.timestamp = int(time.time())
        self.channel = channel
        self.cmsswVersions = []
        self.configurations = []
        self.psetHashes = {}
        self.origCfgs = {}
        self.acquisitionEra = None
        self.processingString = None
        self.processingVersion = None
        self.conditions = None

        # turn on use of proper naming convention for datasets
        # should be made the default soon, lets deprecate all the old crap
        self.useProperNamingConventions = False

        self.options = {}
        self.options.setdefault('FakeHash', False)

        # Should we use another attribute for setting the output dataset
        # status in DBS?
        self.outputDatasetStatus = 'VALID'

        self.inputDataset = {}
        self.inputDataset['IsUsed'] = False
        self.inputDataset['DatasetName'] = None
        self.inputDataset['Primary'] = None
        self.inputDataset['Processed'] = None
        self.inputDataset['DataTier'] = None
        #  //
        # // Extra controls over input dataset if required
        #//
        self.inputDataset['SplitType'] = None
        self.inputDataset['SplitSize'] = None
        self.inputDataset['OnlySites'] = None
        self.inputDataset['OnlyBlocks'] = None
        self.inputDataset['OnlyClosedBlocks'] = True

        #  //
        # // Pileup Dataset controls
        #//
        self.pileupDatasets = []

        #  //
        # // Initialise basic workflow
        #//
        self.workflow = WorkflowSpec()
        self.workflowName = "%s-%s-%s" % (label, channel, requestId)
        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("mc")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters['RequestLabel'] = self.label
        self.workflow.parameters['ProdRequestID'] = self.requestId

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"

        self.cmsRunNodes = [self.cmsRunNode]
        self.saveOutputFor = []

    def chainCmsRunNode(self, stageOutIntermediates=False, *outputModules):
        """
        append a cmsRun config to the current cmsRun node and chain them
        """
        if stageOutIntermediates:  #Do we want to keep cmsRunNode's products?
            self.saveOutputFor.append(self.cmsRunNode.name)
        newnode = self.cmsRunNode.newNode("cmsRun%s" %
                                          (len(self.cmsRunNodes) + 1))
        newnode.type = "CMSSW"
        if not outputModules:
            outputModules = self.configurations[-1].outputModules.keys()
        for outmodule in outputModules:
            newnode.addInputLink(self.cmsRunNode.name,
                                 outmodule,
                                 'source',
                                 AppearStandalone=not stageOutIntermediates)
        self.cmsRunNode = newnode
        self.cmsRunNodes.append(newnode)

    def changeCategory(self, newCategory):
        """
        _changeCategory_

        Change the workflow category from the default mc
        that appears in the LFNs

        """
        self.workflow.setRequestCategory(newCategory)
        return

    def setAcquisitionEra(self, era):
        """
        _setAcquisitionEra_
        
        Sets the AcquisitionEra in the workflow 

        """
        self.workflow.setAcquisitionEra(era)
        self.acquisitionEra = era
        return

    def setNamingConventionParameters(self, era, procString, procVers):
        """
        _setNamingConventionParameters_

        Sets AcquisitionEra, ProcessingString and ProcessingVersion

        """
        self.workflow.setAcquisitionEra(era)
        self.workflow.parameters['ProcessingString'] = procString
        self.workflow.parameters['ProcessingVersion'] = procVers

        self.acquisitionEra = era
        self.processingString = procString
        self.processingVersion = procVers

        self.useProperNamingConventions = True

        return

    def setActivity(self, activity):
        """
        _changeWorkflowType_
        
        Set the workflow type
        i.e. Simulation, Reconstruction, Reprocessing, Skimming
        """
        self.workflow.setActivity(activity)
        return

    def setCMSSWVersion(self, version):
        """
        _setCMSSWVersion_

        Set the version of CMSSW to be used

        """
        self.cmsswVersions.append(version)
        self.cmsRunNode.application['Version'] = version
        self.cmsRunNode.application['Executable'] = "cmsRun"
        self.cmsRunNode.application['Project'] = "CMSSW"
        self.cmsRunNode.application['Architecture'] = ""
        return

    def setUserSandbox(self, sandboxloc):
        """
        _setSandbox_
        Sets the location of the user sandbox

        """
        self.cmsRunNode.userSandbox = sandboxloc
        return

    def setPhysicsGroup(self, group):
        """
        _setPhysicsGroup_

        Physics Group owning the workflow

        """
        self.group = group
        self.workflow.parameters['PhysicsGroup'] = self.group
        return

    def setConfiguration(self, cfgFile, **args):
        """
        _setConfiguration_

        Provide the CMSSW configuration to be used.
        By default, assume that cfgFile is a python format string.

        The format & type can be specified using args:

        - Type   : must be "file" or "string" or "instance"
        
        """
        cfgType = args.get("Type", "instance")

        if cfgType not in ("file", "string", "instance"):
            msg = "Illegal Type for cfg file: %s\n" % cfgType
            msg += "Should be \"file\" or \"string\"\n"
            raise RuntimeError, msg

        cfgContent = cfgFile
        if cfgType == "file":
            cfgContent = file(cfgFile).read()
            cfgType = "string"

        if cfgType == "string":
            cfgData = cfgContent
            cfgContent = CMSSWConfig()
            cfgContent.unpack(cfgData)

        self.cmsRunNode.cfgInterface = cfgContent
        self.configurations.append(cfgContent)
        return

    def setOriginalCfg(self, honkingGreatString):
        """
        _setOriginalCfg_

        Set the original cfg file content that is to be recorded in DBS

        CALL THIS METHOD AFTER setConfiguration
        
        """
        sep = '\n\n### Next chained config file ###\n\n'
        cfg = ''
        for link in self.cmsRunNode._InputLinks:
            if link['AppearStandalone']:
                prev_config = self.origCfgs.get(link['InputNode'], '')
                if prev_config:
                    cfg = '%s%s%s' % (cfg, prev_config, sep)
        cfg = '%s%s' % (cfg, honkingGreatString)
        self.cmsRunNode.cfgInterface.originalCfg = cfg
        self.origCfgs[self.cmsRunNode.name] = cfg
        return

    def setPSetHash(self, hashValue):
        """
        _setPSetHash_

        Set the value for the PSetHash
        
        If any InputLinks are present their pset hashes are prepended

        """
        hash = ''
        for link in self.cmsRunNode._InputLinks:
            if link['AppearStandalone']:
                prev_node_hash = self.psetHashes.get(link['InputNode'], None)
                if prev_node_hash:  # cmsGen nodes will be missing
                    hash = '%s%s_' % (hash, prev_node_hash)
        hash = '%s%s' % (hash, hashValue)
        self.psetHashes[self.cmsRunNode.name] = hash
        return

    def addInputDataset(self, datasetPath):
        """
        _addInputDataset_

        If this workflow processes a dataset, set that here

        NOTE: Is possible to also specify
            - Split Type (file or event)
            - Split Size (int)
            - input DBS
        Not sure how many of these we want to use.
        For now, they can be added to the inputDataset dictionary
        """
        datasetBits = DatasetConventions.parseDatasetPath(datasetPath)
        self.inputDataset.update(datasetBits)
        self.inputDataset['IsUsed'] = True
        self.inputDataset['DatasetName'] = datasetPath

        return

    def addPileupDataset(self, datasetName, filesPerJob=10, targetModule=None):
        """
        _addPileupDataset_

        Add a dataset to provide pileup overlap.
        filesPerJob should be 1 in 99.9 % of cases

        """
        pileupDataset = {}
        pileupDataset['Primary'] = None
        pileupDataset['Processed'] = None
        pileupDataset['DataTier'] = None
        datasetBits = DatasetConventions.parseDatasetPath(datasetName)
        pileupDataset.update(datasetBits)
        pileupDataset['FilesPerJob'] = filesPerJob
        # Target module coould be 'MixingModule' or 'DataMixingModule' for
        # the moment. If None, MixingModule will be used.
        pileupDataset['TargetModule'] = targetModule
        self.pileupDatasets.append(pileupDataset)
        return

    def addFinalDestination(self, *phedexNodeNames):
        """
        _addFinalDestination_

        Add a final destination that can be used to generate
        a PhEDEx subscription so that the data gets transferred to
        some final location.

        NOTE: Do we want to support a list of PhEDEx nodes? Eg CERN + FNAL

        """
        nameList = ""
        for nodeName in phedexNodeNames:
            nameList += "%s," % nodeName
        nameList = nameList[:-1]
        self.workflow.parameters['PhEDExDestination'] = nameList
        return

    def addSelectionEfficiency(self, selectionEff):
        """
        _addSelectionEfficiency_

        Do we have a selection efficiency?

        """

        self.cmsRunNode.applicationControls["SelectionEfficiency"] = \
                                                             selectionEff
        return

    def setOutputDatasetDbsStatus(self, status):
        """
        _setOutputDatasetDbsStatus_

        The output datasets will have this status in the field dataset.status.
        This value will be use when registering the output dataset in DBS.

        Only two values are acepted:
            - VALID
            - PRODUCTION

        """

        if status in ('VALID', 'PRODUCTION'):
            self.outputDatasetStatus = status

        return

    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()

        #  //
        # // Add Stage Out node
        #//
        self.saveOutputFor.append(self.cmsRunNode.name)
        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1",
                                      *self.saveOutputFor)
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // Input Dataset?
        #//
        if self.inputDataset['IsUsed']:
            inputDataset = self.cmsRunNodes[0].addInputDataset(
                self.inputDataset['Primary'], self.inputDataset['Processed'])
            inputDataset["DataTier"] = self.inputDataset['DataTier']
            for keyname in [
                    'SplitType',
                    'SplitSize',
                    'OnlySites',
                    'OnlyBlocks',
                    'OnlyClosedBlocks',
            ]:
                if self.inputDataset[keyname] != None:
                    self.workflow.parameters[keyname] = self.inputDataset[
                        keyname]

        #  //
        # // Pileup Datasets?
        #//
        for pileupDataset in self.pileupDatasets:
            puDataset = self.cmsRunNodes[0].addPileupDataset(
                pileupDataset['Primary'], pileupDataset['DataTier'],
                pileupDataset['Processed'])
            puDataset['FilesPerJob'] = pileupDataset['FilesPerJob']
            if pileupDataset['TargetModule'] is not None:
                puDataset['TargetModule'] = pileupDataset['TargetModule']

        #  //
        # // Extract dataset info from cfg
        #//
        datasets = {}
        datasetsToForward = {}
        for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations):

            # Ignore nodes that don't save any output. But keep input dataset
            # in case we need to forward it.
            if cmsRunNode.name not in self.saveOutputFor:
                # Store parent dataset in case we need to forward it.
                if self.inputDataset['IsUsed'] and \
                                            cmsRunNode == self.cmsRunNodes[0]:
                    datasetsToForward[cmsRunNode.name] = \
                                            self.inputDataset['DatasetName']
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        # If the previous cmsRunNode stages out, pull down the
                        # dataset it produced.
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasets['%s:%s' % (inputLink['InputNode'],
                                inputLink['OutputModule'])]
                        # If the previous cmsRunNode does not stage out, then
                        # use it's parent.
                        else:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasetsToForward[inputLink['InputNode']]
                continue

            for outModName in config.outputModules.keys():
                moduleInstance = config.getOutputModule(outModName)
                dataTier = moduleInstance['dataTier']
                filterName = moduleInstance["filterName"]
                primaryName = DatasetConventions.primaryDatasetName(
                    PhysicsChannel=self.channel, )

                if self.useProperNamingConventions:
                    if self.processingString and filterName:
                        processingString = "_".join(
                            (self.processingString, filterName))
                    elif self.processingString:
                        processingString = self.processingString
                    elif filterName:
                        processingString = filterName
                    else:
                        processingString = None
                    processedName = DatasetConventions.properProcessedDatasetName(
                        AcquisitionEra=self.acquisitionEra,
                        ProcessingString=processingString,
                        ProcessingVersion=self.processingVersion,
                        Unmerged=True)
                elif self.acquisitionEra == None:
                    processedName = DatasetConventions.processedDatasetName(
                        Version=cmsRunNode.application['Version'],
                        Label=self.label,
                        Group=self.group,
                        FilterName=filterName,
                        RequestId=self.requestId,
                        Unmerged=True)
                else:
                    processedName = DatasetConventions.csa08ProcessedDatasetName(
                        AcquisitionEra=self.acquisitionEra,
                        Conditions=self.workflow.parameters['Conditions'],
                        ProcessingVersion=self.workflow.
                        parameters['ProcessingVersion'],
                        FilterName=filterName,
                        Unmerged=True)

                dataTier = DatasetConventions.checkDataTier(dataTier)

                moduleInstance['primaryDataset'] = primaryName
                moduleInstance['processedDataset'] = processedName

                outDS = cmsRunNode.addOutputDataset(primaryName, processedName,
                                                    outModName)

                outDS['Status'] = self.outputDatasetStatus
                outDS['DataTier'] = dataTier
                outDS["ApplicationName"] = \
                                         cmsRunNode.application["Executable"]
                outDS["ApplicationFamily"] = outModName
                outDS["PhysicsGroup"] = self.group

                # check for input dataset for first node
                if self.inputDataset[
                        'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]:
                    outDS['ParentDataset'] = self.inputDataset['DatasetName']
                # check for staged out intermediates
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            outDS['ParentDataset'] = datasets[
                                '%s:%s' % (inputLink['InputNode'],
                                           inputLink['OutputModule'])]
                        elif datasetsToForward.get(
                                inputLink['InputNode']) is not None:
                            outDS['ParentDataset'] = \
                                    datasetsToForward[inputLink['InputNode']]

                if self.options['FakeHash']:
                    guid = makeUUID()
                    outDS['PSetHash'] = "hash=%s;guid=%s" % \
                            (self.psetHashes[cmsRunNode.name], guid)
                else:
                    outDS['PSetHash'] = self.psetHashes[cmsRunNode.name]

                # record output in case used as input to a later node
                datasets['%s:%s' % (cmsRunNode.name, outModName)] = \
                                "/%s/%s/%s" % ( outDS['PrimaryDataset'],
                                                  outDS['ProcessedDataset'],
                                                  outDS['DataTier'])

        # optionally remap sibling relationships to parent-child (i.e HLTDEBUG)
        remapParentageForWorkflow(self.workflow)
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow

    def _Validate(self):
        """
        _Validate_

        Private method to test all options are set.

        Throws a WorkflowMakerError if any problems found

        """
        notNoneAttrs = [
            "requestId",
            "label",
            "group",
            "channel",
        ]
        for attrName in notNoneAttrs:
            value = getattr(self, attrName, None)
            if value == None:
                msg = "Attribute Not Set: %s" % attrName
                raise WorkflowMakerError(msg)

        if not len(self.configurations):
            msg = "Attribute Not Set: configurations"
            raise WorkflowMakerError(msg)

        if len(self.configurations) != len(self.cmsswVersions):
            msg = "len(self.configurations) != len(self.cmsswVersions)"
            raise WorkflowMakerError(msg)

        return
コード例 #8
0
class PromptRecoWorkflow(FactoryInterface):
    """
    _PromptWorkflow_

    Factory to build workflows for PromptReco jobs.
    """
    def __init__(self, runNumber, version, cmsPath, scramArch):
        FactoryInterface.__init__(self, version, cmsPath, scramArch)
        self.run = runNumber
        self.workflow = None
        self.timestamp = None
        self.cmsRunNode = None
        self.workflowName = None
        self.configFile = None
        self.useLazyDownload = False
        self.primaryDataset = None
        self.processedDataset = None
        self.parentProcessedDataset = None
        self.acquisitionEra = None
        self.processingVersion = None

    def setConfigFile(self, configFile):
        """
        _setConfigFile_

        Set the config file that will be loaded into the workflow.
        """
        self.configFile = configFile

    def setPrimaryDataset(self, primaryDatasetName):
        """
        _setPrimaryDataset_

        Set the primary dataset that this workflow will run over.
        """
        self.primaryDataset = primaryDatasetName
        return

    def setProcessedDataset(self, processedDatasetName):
        """
        _setProcessedDataset_

        Set the processed dataset that this workflow will produce.
        """
        self.processedDataset = processedDatasetName
        return

    def setParentProcessedDataset(self, parentProcessedDatasetName):
        """
        _setParentProcessedDataset_

        Set the parent processed dataset for this workflow.
        """
        self.parentProcessedDataset = parentProcessedDatasetName
        return    

    def setAcquisitionEra(self, acquisitionEra):
        """
        _setAcquisitionEra_

        Set the acquisition era.
        """
        self.acquisitionEra = acquisitionEra
        return

    def setProcessingVersion(self, processingVersion):
        """
        _setProcessingVersion_

        Set the processing version.
        """
        self.processingVersion = processingVersion
        return

    def setLazyDownload(self, useLazyDownload):
        """
        _setLazyDownload_

        This enables/disables lazy download mode in the framework.
        """
        self.useLazyDownload = useLazyDownload
    
    def setupOutputModule(self, outputModuleName, dataTier):
        """
        _setupOutputModule_

        Create the outputModule and outputDataset sections of the workflow.
        """
        outputDataset = self.cmsRunNode.addOutputDataset(self.primaryDataset,
                                                         self.processedDataset,
                                                         outputModuleName)
        outputDataset["NoMerge"] = "True"
        outputDataset["DataTier"] = dataTier
        outputDataset["ApplicationName"] = "cmsRun"
        outputDataset["ApplicationProject"] = "CMSSW"
        outputDataset["ApplicationVersion"] = self.cmssw["CMSSWVersion"]
        outputDataset["ApplicationFamily"] = outputModuleName
        outputDataset["ParentDataset"] = "/%s/%s/%s" % (self.primaryDataset,
                                                        self.parentProcessedDataset,
                                                        "RAW")

        cfgWrapper = self.workflow.payload.cfgInterface
        outputModule = cfgWrapper.getOutputModule(outputModuleName)

        outputModule["catalog"] = '%s-Catalog.xml' % outputModule['Name']
        outputModule["primaryDataset"] = self.primaryDataset
        outputModule["processedDataset"] = self.processedDataset
        outputModule["dataTier"] = dataTier
        outputModule["acquisitionEra"] = self.acquisitionEra
        outputModule["processingVersion"] = self.processingVersion

        outputDataset["LFNBase"] = getLFN(outputModule, self.run, Unmerged = True)
        outputDataset["MergedLFNBase"] = getLFN(outputModule, self.run)
        outputModule["LFNBase"] = outputDataset["LFNBase"]
        outputModule["MergedLFNBase"] = outputDataset["MergedLFNBase"]

        outputModule["fileName"] = "%s.root" % outputModule['Name']

        outputModule["logicalFileName"] = os.path.join(
            outputDataset["LFNBase"], "PromptReco.%s.root" % dataTier)

        return
    
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Generate a workflow.  If the self.configFile parameter has been set
        this will attempt to load the config from file, otherwise it will
        create an empty process object which will get filled in by the runtime
        script.
        """
        self.timestamp = int(time.time())
        self.workflow = WorkflowSpec()
        self.workflowName = "PromptReco-Run%s-%s" % (self.run,
                                                     self.primaryDataset)

        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters["WorkflowType"] = "Processing"
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"
        self.cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        self.cmsRunNode.application["Executable"] = "cmsRun"
        self.cmsRunNode.application["Project"] = "CMSSW"
        self.cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        inputDataset = self.cmsRunNode.addInputDataset(self.primaryDataset,
                                                       self.parentProcessedDataset)
        inputDataset["DataTier"] = "RAW"
        
        if self.configFile == None:
            self.loadProcessFromFramework()            
        else:
            self.loadProcessFromFile()

        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow

    def loadProcessFromFile(self):
        """
        _loadProcessFromFile_

        Load the config file into the workflow.
        """
        preExecScript = self.cmsRunNode.scriptControls["PreExe"]
        preExecScript.append("T0.PromptRecoInjector.RuntimePromptReco")
        
        cfgBaseName = os.path.basename(self.configFile).replace(".py", "")
        cfgDirName = os.path.dirname(self.configFile)
        modPath = imp.find_module(cfgBaseName, [cfgDirName])

        loader = CMSSWAPILoader(self.cmssw["ScramArch"],
                                self.cmssw["CMSSWVersion"],
                                self.cmssw["CMSPath"])
        
        try:
            loader.load()
        except Exception, ex:
            logging.error("Couldn't load CMSSW libraries: %s" % ex)
            return None
        
        try:
            modRef = imp.load_module(cfgBaseName, modPath[0],
                                     modPath[1], modPath[2])
        except Exception, ex:
            logging.error("Can't load config: %s" % ex)
            loader.unload()
            return None
コード例 #9
0
class ExpressWorkflow(FactoryInterface):
    """
    _ExpressFactory_

    Util to build workflows for express processing jobs
    
    """
    def __init__(self, runNumber, version, globalTag, cmsPath, scramArch, *outModuleInfo):
        FactoryInterface.__init__(self, version, cmsPath, scramArch)
        self.run = runNumber
        self.outputModules = list(outModuleInfo)
        self.globalTag = globalTag


    def buildConfiguration(self, configFile, enableLazyDownload):
        """
        _buildConfiguration_

        mostly just a method to take the passed in information

        """
        outputModuleDetails = {}

        for moduleInfo in self.outputModules:

            if moduleInfo.has_key("dataset"):
                moduleName = "write_%s_%s_%s" % (moduleInfo["stream"],
                                                 moduleInfo["dataset"],
                                                 moduleInfo["dataTier"])
            else:
                moduleName = "write_%s_%s" % (moduleInfo["stream"],
                                              moduleInfo["dataTier"])

            outputModuleDetails[moduleName] = {
                "Stream" : moduleInfo["stream"],
                "primaryDataset" : moduleInfo.get("dataset", None),
                "processedDataset" : moduleInfo.get("processedDataset", None),
                "dataTier" : moduleInfo["dataTier"],
                "acquisitionEra" : moduleInfo["acquisitionEra"],
                "processingVersion" : moduleInfo["processingVersion"],
##                "globalTag" : moduleInfo["globalTag"],
                "compressionLevel" : 3
                }

            if moduleInfo.has_key("triggerPaths"):
                selEvents = [ "%s:%s" % (x, moduleInfo["process"])
                              for x in moduleInfo["triggerPaths"] ]
                outputModuleDetails[moduleName]["SelectEvents"] = selEvents
            else:
                outputModuleDetails[moduleName]["SelectEvents"] = None

        cfgInterface = self.createConfiguration(sourceType = "NewEventStreamFileReader",
                                                configFile = configFile,
                                                enableLazyDownload = enableLazyDownload,
                                                outputModuleDetails = outputModuleDetails,
                                                setEventContentInOutput = True,
                                                compressionLevel = 3)

        return cfgInterface


    def makeWorkflowSpec(self, name, configFile, enableLazyDownload):
        """
        _makeWorkflowSpec_

        Create a workflow spec instance

        """
        #  //
        # // Initialise basic workflow
        #//
        self.workflow = WorkflowSpec()
        self.workflow.setWorkflowName(name)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(int(time.time()))
        self.workflow.parameters["WorkflowType"] = "Repack"
        self.workflow.parameters["RequestLabel"] = name
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        # runtime support for StreamerJobEntity
        self.workflow.addPythonLibrary("T0.DataStructs")

        cmsRunNode = self.workflow.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        cmsRunNode.application["Executable"] = "cmsRun"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        # runtime express script
        cmsRunNode.scriptControls["PreExe"].append(
            "T0.ExpressInjector.RuntimeExpress")
        
        # build the configuration template for the workflow
        cmsRunNode.cfgInterface = self.buildConfiguration(configFile, enableLazyDownload)
        if cmsRunNode.cfgInterface == None:
            return None

        # override global tag
        cmsRunNode.cfgInterface.conditionsTag = self.globalTag

        # generate Dataset information for workflow from cfgInterface
        for outMod,moduleInstance in cmsRunNode.cfgInterface.outputModules.items():
            primaryName = moduleInstance["primaryDataset"] 
            processedName = moduleInstance["processedDataset"] 

            outDS = cmsRunNode.addOutputDataset(primaryName,
                                                processedName,
                                                outMod)
            
            outDS["DataTier"] = moduleInstance["dataTier"]
            outDS["ApplicationName"] = cmsRunNode.application["Executable"]
            outDS["ApplicationFamily"] = outMod
            outDS["PhysicsGroup"] = "Tier0"
            outDS["ApplicationFamily"] = outMod

            # generate just single LFN stub (all output is unmerged)
            # insert them into the output module and dataset info
            outDS["LFNBase"] = self.getLFN(moduleInstance, dataType = 'express', Unmerged = True)
            moduleInstance["LFNBase"] = outDS["LFNBase"]
            moduleInstance["logicalFileName"] = os.path.join(
                outDS["LFNBase"], "%s.root" % outMod
                )

        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")
        
        return self.workflow
コード例 #10
0
class WorkflowMaker:
    """
    _WorkflowMaker_

    Basic MC workflow maker for PR to use to create workflow spec files.
    
    """
    def __init__(self, requestId, channel, label):
        self.requestId = requestId
        self.group = None
        self.label = label
        self.timestamp = int(time.time())
        self.channel = channel
        self.cmsswVersions = []
        self.configurations = []
        self.psetHashes = {}
        self.origCfgs = {}
        self.acquisitionEra = None
        self.processingString = None
        self.processingVersion = None
        self.conditions = None

        # turn on use of proper naming convention for datasets
        # should be made the default soon, lets deprecate all the old crap
        self.useProperNamingConventions = False
        
        self.options = {}
        self.options.setdefault('FakeHash', False)

        # Should we use another attribute for setting the output dataset
        # status in DBS?
        self.outputDatasetStatus = 'VALID'

        self.inputDataset = {}
        self.inputDataset['IsUsed'] = False
        self.inputDataset['DatasetName'] = None
        self.inputDataset['Primary'] = None
        self.inputDataset['Processed'] = None
        self.inputDataset['DataTier'] = None
        #  //
        # // Extra controls over input dataset if required
        #//
        self.inputDataset['SplitType'] = None
        self.inputDataset['SplitSize'] = None
        self.inputDataset['OnlySites'] = None
        self.inputDataset['OnlyBlocks'] = None
        self.inputDataset['OnlyClosedBlocks'] = True

        #  //
        # // Pileup Dataset controls
        #//
        self.pileupDatasets = []
        
        #  //
        # // Initialise basic workflow
        #//
        self.workflow = WorkflowSpec()
        self.workflowName = "%s-%s-%s" % (label, channel, requestId)
        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("mc")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters['RequestLabel'] = self.label
        self.workflow.parameters['ProdRequestID'] = self.requestId

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"
        
        self.cmsRunNodes = [self.cmsRunNode]
        self.saveOutputFor = []


    def chainCmsRunNode(self, stageOutIntermediates = False, *outputModules):
        """
        append a cmsRun config to the current cmsRun node and chain them
        """
        if stageOutIntermediates: #Do we want to keep cmsRunNode's products?
            self.saveOutputFor.append(self.cmsRunNode.name)    
        newnode = self.cmsRunNode.newNode("cmsRun%s" % 
                                          (len(self.cmsRunNodes) + 1))
        newnode.type = "CMSSW"
        if not outputModules:
            outputModules = self.configurations[-1].outputModules.keys()
        for outmodule in outputModules:
            newnode.addInputLink(self.cmsRunNode.name, outmodule,
                        'source', AppearStandalone = not stageOutIntermediates)
        self.cmsRunNode = newnode
        self.cmsRunNodes.append(newnode)


    def changeCategory(self, newCategory):
        """
        _changeCategory_

        Change the workflow category from the default mc
        that appears in the LFNs

        """
        self.workflow.setRequestCategory(newCategory)
        return

    def setAcquisitionEra(self,era):
        """
        _setAcquisitionEra_
        
        Sets the AcquisitionEra in the workflow 

        """
        self.workflow.setAcquisitionEra(era)
        self.acquisitionEra=era
        return


    def setNamingConventionParameters(self, era, procString, procVers):
        """
        _setNamingConventionParameters_

        Sets AcquisitionEra, ProcessingString and ProcessingVersion

        """
        self.workflow.setAcquisitionEra(era)
        self.workflow.parameters['ProcessingString'] = procString
        self.workflow.parameters['ProcessingVersion'] = procVers
        
        self.acquisitionEra=era
        self.processingString = procString
        self.processingVersion = procVers

        self.useProperNamingConventions = True

        return

    
    def setActivity(self, activity):
        """
        _changeWorkflowType_
        
        Set the workflow type
        i.e. Simulation, Reconstruction, Reprocessing, Skimming
        """
        self.workflow.setActivity(activity)
        return
    

    def setCMSSWVersion(self, version):
        """
        _setCMSSWVersion_

        Set the version of CMSSW to be used

        """
        self.cmsswVersions.append(version)
        self.cmsRunNode.application['Version'] = version
        self.cmsRunNode.application['Executable'] = "cmsRun"
        self.cmsRunNode.application['Project'] = "CMSSW"
        self.cmsRunNode.application['Architecture'] = ""
        return


    def setUserSandbox(self,sandboxloc):
        """
        _setSandbox_
        Sets the location of the user sandbox

        """
        self.cmsRunNode.userSandbox=sandboxloc
        return
    
    
    def setPhysicsGroup(self, group):
        """
        _setPhysicsGroup_

        Physics Group owning the workflow

        """
        self.group = group
        self.workflow.parameters['PhysicsGroup'] = self.group
        return

    
    def setConfiguration(self, cfgFile, **args):
        """
        _setConfiguration_

        Provide the CMSSW configuration to be used.
        By default, assume that cfgFile is a python format string.

        The format & type can be specified using args:

        - Type   : must be "file" or "string" or "instance"
        
        """
        cfgType = args.get("Type", "instance")
        
        
        if cfgType not in ("file", "string", "instance"):
            msg = "Illegal Type for cfg file: %s\n" % cfgType
            msg += "Should be \"file\" or \"string\"\n"
            raise RuntimeError, msg

        cfgContent = cfgFile
        if cfgType == "file":
            cfgContent = file(cfgFile).read()
            cfgType = "string"
            
        if cfgType == "string":
            cfgData = cfgContent
            cfgContent = CMSSWConfig()
            cfgContent.unpack(cfgData)
        
                
        self.cmsRunNode.cfgInterface = cfgContent
        self.configurations.append(cfgContent)
        return


    def setOriginalCfg(self, honkingGreatString):
        """
        _setOriginalCfg_

        Set the original cfg file content that is to be recorded in DBS

        CALL THIS METHOD AFTER setConfiguration
        
        """
        sep = '\n\n### Next chained config file ###\n\n'
        cfg = ''
        for link in self.cmsRunNode._InputLinks:
            if link['AppearStandalone']:
                prev_config = self.origCfgs.get(link['InputNode'], '')
                if prev_config:
                    cfg = '%s%s%s' % (cfg, prev_config, sep)
        cfg = '%s%s' % (cfg, honkingGreatString)
        self.cmsRunNode.cfgInterface.originalCfg = cfg
        self.origCfgs[self.cmsRunNode.name] = cfg
        return
        
    def setPSetHash(self, hashValue):
        """
        _setPSetHash_

        Set the value for the PSetHash
        
        If any InputLinks are present their pset hashes are prepended

        """
        hash = ''
        for link in self.cmsRunNode._InputLinks:
            if link['AppearStandalone']:
                prev_node_hash = self.psetHashes.get(link['InputNode'], None)
                if prev_node_hash:  # cmsGen nodes will be missing
                    hash = '%s%s_' % (hash, prev_node_hash)
        hash = '%s%s' % (hash, hashValue)
        self.psetHashes[self.cmsRunNode.name] = hash                           
        return
        

    
    def addInputDataset(self, datasetPath):
        """
        _addInputDataset_

        If this workflow processes a dataset, set that here

        NOTE: Is possible to also specify
            - Split Type (file or event)
            - Split Size (int)
            - input DBS
        Not sure how many of these we want to use.
        For now, they can be added to the inputDataset dictionary
        """
        datasetBits = DatasetConventions.parseDatasetPath(datasetPath)
        self.inputDataset.update(datasetBits)
        self.inputDataset['IsUsed'] = True
        self.inputDataset['DatasetName'] = datasetPath
        
        return
        

    def addPileupDataset(self, datasetName, filesPerJob = 10,
            targetModule=None):
        """
        _addPileupDataset_

        Add a dataset to provide pileup overlap.
        filesPerJob should be 1 in 99.9 % of cases

        """
        pileupDataset = {}
        pileupDataset['Primary'] = None
        pileupDataset['Processed'] = None
        pileupDataset['DataTier'] = None
        datasetBits = DatasetConventions.parseDatasetPath(datasetName)
        pileupDataset.update(datasetBits)
        pileupDataset['FilesPerJob'] = filesPerJob
        # Target module coould be 'MixingModule' or 'DataMixingModule' for
        # the moment. If None, MixingModule will be used.
        pileupDataset['TargetModule'] = targetModule
        self.pileupDatasets.append(pileupDataset)
        return

    def addFinalDestination(self, *phedexNodeNames):
        """
        _addFinalDestination_

        Add a final destination that can be used to generate
        a PhEDEx subscription so that the data gets transferred to
        some final location.

        NOTE: Do we want to support a list of PhEDEx nodes? Eg CERN + FNAL

        """
        nameList = ""
        for nodeName in phedexNodeNames:
            nameList += "%s," % nodeName
        nameList = nameList[:-1]
        self.workflow.parameters['PhEDExDestination'] = nameList
        return
    
    def addSelectionEfficiency(self, selectionEff):
        """
        _addSelectionEfficiency_

        Do we have a selection efficiency?

        """
        
        self.cmsRunNode.applicationControls["SelectionEfficiency"] = \
                                                             selectionEff
        return

    def setOutputDatasetDbsStatus(self, status):
        """
        _setOutputDatasetDbsStatus_

        The output datasets will have this status in the field dataset.status.
        This value will be use when registering the output dataset in DBS.

        Only two values are acepted:
            - VALID
            - PRODUCTION

        """
        
        if status in ('VALID', 'PRODUCTION'):
            self.outputDatasetStatus = status

        return

    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()
        
        #  //
        # // Add Stage Out node
        #//
        self.saveOutputFor.append(self.cmsRunNode.name)
        WorkflowTools.addStageOutNode(self.cmsRunNode,
                        "stageOut1", *self.saveOutputFor)
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // Input Dataset?
        #//
        if self.inputDataset['IsUsed']:
            inputDataset = self.cmsRunNodes[0].addInputDataset(
                self.inputDataset['Primary'],
                self.inputDataset['Processed']
                )
            inputDataset["DataTier"] = self.inputDataset['DataTier']
            for keyname in [
                'SplitType',
                'SplitSize',
                'OnlySites',
                'OnlyBlocks',
                'OnlyClosedBlocks',
                ]:
                if self.inputDataset[keyname] != None:
                    self.workflow.parameters[keyname] = self.inputDataset[keyname]
                    
            
        #  //
        # // Pileup Datasets?
        #//
        for pileupDataset in self.pileupDatasets:
            puDataset = self.cmsRunNodes[0].addPileupDataset(
                pileupDataset['Primary'],
                pileupDataset['DataTier'],
                pileupDataset['Processed'])
            puDataset['FilesPerJob'] = pileupDataset['FilesPerJob']
            if pileupDataset['TargetModule'] is not None:
                puDataset['TargetModule'] = pileupDataset['TargetModule']
            
        
        #  //
        # // Extract dataset info from cfg
        #//
        datasets = {}
        datasetsToForward = {}
        for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations):
            
            # Ignore nodes that don't save any output. But keep input dataset
            # in case we need to forward it.
            if cmsRunNode.name not in self.saveOutputFor:
                # Store parent dataset in case we need to forward it.
                if self.inputDataset['IsUsed'] and \
                                            cmsRunNode == self.cmsRunNodes[0]:
                    datasetsToForward[cmsRunNode.name] = \
                                            self.inputDataset['DatasetName']
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        # If the previous cmsRunNode stages out, pull down the
                        # dataset it produced.
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasets['%s:%s' % (inputLink['InputNode'],
                                inputLink['OutputModule'])]
                        # If the previous cmsRunNode does not stage out, then
                        # use it's parent.
                        else:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasetsToForward[inputLink['InputNode']]
                continue
            
            for outModName in config.outputModules.keys():
                moduleInstance = config.getOutputModule(outModName)
                dataTier = moduleInstance['dataTier']
                filterName = moduleInstance["filterName"]
                primaryName = DatasetConventions.primaryDatasetName(
                                        PhysicsChannel = self.channel,
                                        )

                if self.useProperNamingConventions:
                    if self.processingString and filterName:
                        processingString = "_".join((self.processingString, filterName))
                    elif self.processingString:
                        processingString = self.processingString
                    elif filterName:
                        processingString = filterName
                    else:
                        processingString = None
                    processedName = DatasetConventions.properProcessedDatasetName(
                        AcquisitionEra = self.acquisitionEra,
                        ProcessingString = processingString,
                        ProcessingVersion = self.processingVersion,
                        Unmerged = True
                        )
                elif self.acquisitionEra == None:
                    processedName = DatasetConventions.processedDatasetName(
                        Version = cmsRunNode.application['Version'],
                        Label = self.label,
                        Group = self.group,
                        FilterName = filterName,
                        RequestId = self.requestId,
                        Unmerged = True
                        )
                else:
                    processedName = DatasetConventions.csa08ProcessedDatasetName(
                        AcquisitionEra = self.acquisitionEra,
                        Conditions = self.workflow.parameters['Conditions'],
                        ProcessingVersion = self.workflow.parameters['ProcessingVersion'],
                        FilterName = filterName,
                        Unmerged = True
                        )
                  
                dataTier = DatasetConventions.checkDataTier(dataTier)

                moduleInstance['primaryDataset'] = primaryName
                moduleInstance['processedDataset'] = processedName
    
                outDS = cmsRunNode.addOutputDataset(primaryName, 
                                                         processedName,
                                                         outModName)

                outDS['Status'] = self.outputDatasetStatus                
                outDS['DataTier'] = dataTier
                outDS["ApplicationName"] = \
                                         cmsRunNode.application["Executable"]
                outDS["ApplicationFamily"] = outModName
                outDS["PhysicsGroup"] = self.group
    
                # check for input dataset for first node
                if self.inputDataset['IsUsed'] and cmsRunNode == self.cmsRunNodes[0]:
                    outDS['ParentDataset'] = self.inputDataset['DatasetName']
                # check for staged out intermediates
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            outDS['ParentDataset'] = datasets['%s:%s' % (inputLink['InputNode'],
                                                                    inputLink['OutputModule'])]
                        elif datasetsToForward.get(
                                inputLink['InputNode']) is not None:
                            outDS['ParentDataset'] = \
                                    datasetsToForward[inputLink['InputNode']]

                if self.options['FakeHash']:
                    guid = makeUUID()
                    outDS['PSetHash'] = "hash=%s;guid=%s" % \
                            (self.psetHashes[cmsRunNode.name], guid)
                else:
                    outDS['PSetHash'] = self.psetHashes[cmsRunNode.name]

                # record output in case used as input to a later node
                datasets['%s:%s' % (cmsRunNode.name, outModName)] = \
                                "/%s/%s/%s" % ( outDS['PrimaryDataset'],
                                                  outDS['ProcessedDataset'],
                                                  outDS['DataTier'])

        # optionally remap sibling relationships to parent-child (i.e HLTDEBUG)
        remapParentageForWorkflow(self.workflow)
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow



    def _Validate(self):
        """
        _Validate_

        Private method to test all options are set.

        Throws a WorkflowMakerError if any problems found

        """
        notNoneAttrs = [
            "requestId",
            "label",
            "group",
            "channel",
            ]
        for attrName in notNoneAttrs:
            value = getattr(self, attrName, None)
            if value == None:
                msg = "Attribute Not Set: %s" % attrName
                raise WorkflowMakerError(msg)
        
        if not len(self.configurations):
            msg = "Attribute Not Set: configurations"
            raise WorkflowMakerError(msg)
            
        if len(self.configurations) != len(self.cmsswVersions):
            msg = "len(self.configurations) != len(self.cmsswVersions)"
            raise WorkflowMakerError(msg)

        return
コード例 #11
0
if not os.path.exists(cfgFile):
    msg = "Cfg File Not Found: %s" % cfgFile
    raise RuntimeError, msg


#
# create workflow
#

workflowName = "Tier0MCFeeder-%d" % int(time.time())
scramArch = "slc4_ia32_gcc345"
cmsPath = "/afs/cern.ch/cms/sw"

workflow = WorkflowSpec()
workflow.setWorkflowName(workflowName)
workflow.setRequestCategory("mc")
workflow.setRequestTimestamp(int(time.time()))
workflow.parameters["WorkflowType"] = "Processing"
workflow.parameters["CMSSWVersion"] = version
workflow.parameters["ScramArch"] = scramArch
workflow.parameters["CMSPath"] = cmsPath

# needed for streamed index stageout
workflow.parameters['StreamerIndexDir'] = indexdir

cmsRunNode = workflow.payload
cmsRunNode.name = "cmsRun1"
cmsRunNode.type = "CMSSW"
cmsRunNode.application["Version"] = version
cmsRunNode.application["Executable"] = "cmsRun"
cmsRunNode.application["Project"] = "CMSSW"
コード例 #12
0
class MergePackWorkflow(FactoryInterface):
    """
    _MergePackWorkflow_

    Util to build workflows for mergepack jobs
    """
    def __init__(self, runNumber, version, cmsPath, scramArch, *outModuleInfo):
        FactoryInterface.__init__(self, version, cmsPath, scramArch)
        self.run = runNumber
        self.outputModules = list(outModuleInfo)


    def buildConfiguration(self, enableLazyDownload, configFile):
        """
        _buildConfiguration_

        mostly just a method to take the passed in information

        """
        outputModuleDetails = {}

        for moduleInfo in self.outputModules:

            moduleName = "write_%s_%s_%s" % (moduleInfo["stream"],
                                             moduleInfo["dataset"],
                                             moduleInfo["dataTier"])

            outputModuleDetails[moduleName] = {
                "Stream" : moduleInfo["stream"],
                "primaryDataset" : moduleInfo.get("dataset", None),
                "processedDataset" : moduleInfo.get("processedDataset", None),
                "dataTier" : moduleInfo["dataTier"],
                "acquisitionEra" : moduleInfo["acquisitionEra"],
                "processingVersion" : moduleInfo["processingVersion"],
                }

            if moduleInfo.has_key("triggerPaths"):
                selEvents = [ "%s:%s" % (x, moduleInfo["process"])
                              for x in moduleInfo["triggerPaths"] ]
                outputModuleDetails[moduleName]["SelectEvents"] = selEvents
                outputModuleDetails[moduleName]["compressionLevel"] = 3

        if configFile == None:

            cfgInterface = self.createConfiguration(sourceType = "PoolSource",
                                                    processName = "MERGEPACKER",
                                                    configName = "mergepacker-config",
                                                    enableLazyDownload = enableLazyDownload,
                                                    outputModuleDetails = outputModuleDetails,
                                                    noEventSort = True)

        else:

            cfgInterface = self.createConfiguration(sourceType = "PoolSource",
                                                    configFile = configFile,
                                                    enableLazyDownload = enableLazyDownload,
                                                    outputModuleTemplate = outputModuleDetails.values()[0],
                                                    noEventSort = True)
                
        return cfgInterface

    
    def makeWorkflowSpec(self, name, enableLazyDownload, configFile = None):
        """
        _makeWorkflowSpec_

        Create a workflow spec instance
        
        """
        self.workflow = WorkflowSpec()
        self.workflow.setWorkflowName(name)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(int(time.time()))
        self.workflow.parameters["WorkflowType"] = "Repack"
        self.workflow.parameters["RequestLabel"] = name
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        cmsRunNode = self.workflow.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        cmsRunNode.application["Executable"] = "cmsRun"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        # runtime express merge script
        cmsRunNode.scriptControls["PreExe"].append(
            "T0.ExpressMerger.RuntimeExpressMerger"
            )

        # build the configuration template for the workflow
        cmsRunNode.cfgInterface = self.buildConfiguration(enableLazyDownload, configFile)
        if cmsRunNode.cfgInterface == None:
            return None

        # generate Dataset information for workflow from cfgInterface
        for outMod,moduleInstance in cmsRunNode.cfgInterface.outputModules.items():
            primaryName = moduleInstance["primaryDataset"] 
            processedName = moduleInstance["processedDataset"] 

            outDS = cmsRunNode.addOutputDataset(primaryName,
                                                processedName,
                                                outMod)
            
            outDS["DataTier"] = moduleInstance["dataTier"]
            outDS["ApplicationName"] = cmsRunNode.application["Executable"]
            outDS["ApplicationFamily"] = outMod
            outDS["PhysicsGroup"] = "Tier0"
            outDS["ApplicationFamily"] = outMod

            # generate just single LFN stub (all output is merged)
            # insert them into the output module and dataset info
            outDS["LFNBase"] = self.getLFN(moduleInstance, dataType = "express")
            moduleInstance["LFNBase"] = outDS["LFNBase"]
            moduleInstance["logicalFileName"] = os.path.join(
                outDS["LFNBase"], "%s.root" % outMod
                )

        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")

        # override stageout
        #
        # FIXME: This hardcodes the TFC LFN prefix !!!
##        if svcClass != None:
##            finder = NodeFinder("stageOut1")
##            self.workflow.payload.operate(finder)
##            node = finder.result

##            WorkflowTools.addStageOutOverride(node,
##                                              "rfcp",
##                                              "",
##                                              "srm-cms.cern.ch",
##                                              "rfio:///castor?svcClass=%s&path=/castor/cern.ch/cms" % svcClass)

        return self.workflow
コード例 #13
0
class RepackWorkflow(FactoryInterface):
    """
    _RepackFactory_

    Util to build workflows for accumulator or merge repacker jobs
    
    """
    def __init__(self, runNumber, version, cmsPath, scramArch, *outModuleInfo):
        FactoryInterface.__init__(self, version, cmsPath, scramArch)
        self.run = runNumber
        self.outputModules = list(outModuleInfo)


    def buildConfiguration(self, enableLazyDownload):
        """
        _buildConfiguration_

        Using a RepackerConfigMaker instance, generate a template
        config file

        """
        outputModuleDetails = {}

        for moduleInfo in self.outputModules:

            moduleName = "write_%s_%s_%s" % (moduleInfo["stream"],
                                             moduleInfo["dataset"],
                                             moduleInfo["dataTier"])

            outputModuleDetails[moduleName] = {
                "Stream" : moduleInfo["stream"],
                "algorithm" : None,
                "primaryDataset" : moduleInfo.get("dataset", None),
                "processedDataset" : moduleInfo.get("processedDataset", None),
                "dataTier" : moduleInfo["dataTier"],
                "filterName" : None,
                "acquisitionEra" : moduleInfo["acquisitionEra"],
                "processingVersion" : moduleInfo["processingVersion"],
                "globalTag" : moduleInfo["globalTag"],
                "LFNBase" : None,
                "MergedLFNBase" : None,
                "compressionLevel" : 6
                }

            if moduleInfo.has_key("triggerPaths"):
                selEvents = [ "%s:%s" % (x, moduleInfo["process"])
                              for x in moduleInfo["triggerPaths"] ]
                outputModuleDetails[moduleName]["SelectEvents"] = selEvents
            else:
                outputModuleDetails[moduleName]["SelectEvents"] = None

        cfgInterface = self.createConfiguration(sourceType = "NewEventStreamFileReader",
                                                processName = "REPACKER",
                                                configName = "repack-config",
                                                enableLazyDownload = enableLazyDownload,
                                                outputModuleDetails = outputModuleDetails)

        return cfgInterface


    def makeWorkflow(self, name, enableLazyDownload):
        """
        _makeWorkflow_

        Create a workflow spec instance for the run provided

        """
        #  //
        # // Initialise basic workflow
        #//
        self.workflow = WorkflowSpec()
        self.workflow.setWorkflowName(name)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(int(time.time()))
        self.workflow.parameters["WorkflowType"] = "Repack"
        self.workflow.parameters["RequestLabel"] = name
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        # runtime support for StreamerJobEntity
        self.workflow.addPythonLibrary("T0.DataStructs")

        cmsRunNode = self.workflow.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        cmsRunNode.application["Executable"] = "cmsRun"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        # runtime repacker script
        cmsRunNode.scriptControls["PreExe"].append(
            "T0.RepackerInjector.RuntimeRepacker")
        
        # build the configuration template for the workflow
        cmsRunNode.cfgInterface = self.buildConfiguration(enableLazyDownload)
        if cmsRunNode.cfgInterface == None:
            return None

        # generate Dataset information for workflow from cfgInterface
        for outMod in cmsRunNode.cfgInterface.outputModules.keys():
            moduleInstance = cmsRunNode.cfgInterface.getOutputModule(outMod)
            primaryName = moduleInstance["primaryDataset"] 
            processedName = moduleInstance["processedDataset"] 

            outDS = cmsRunNode.addOutputDataset(primaryName, 
                                                processedName,
                                                outMod)
            
            outDS["DataTier"] = moduleInstance["dataTier"]
            outDS["ApplicationName"] = cmsRunNode.application["Executable"]
            outDS["ApplicationFamily"] = outMod
            outDS["PhysicsGroup"] = "Tier0"
            outDS["ApplicationFamily"] = outMod

            # generate merged and unmerged LFN stubs
            # insert them into the output module and dataset info
            outDS["LFNBase"] = self.getLFN(moduleInstance, Unmerged = True)
            outDS["MergedLFNBase"] = self.getLFN(moduleInstance)
            moduleInstance["LFNBase"] = outDS["LFNBase"]
            moduleInstance["MergedLFNBase"] = outDS["MergedLFNBase"]
            moduleInstance["logicalFileName"] = os.path.join(
                outDS["LFNBase"], "%s.root" % outMod)

        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")
        
        return self.workflow