def makeWorkflow(self):
        """
        _makeWorkflow_

        Generate a workflow.  If the self.configFile parameter has been set
        this will attempt to load the config from file, otherwise it will
        create an empty process object which will get filled in by the runtime
        script.
        """
        self.timestamp = int(time.time())
        self.workflow = WorkflowSpec()
        self.workflowName = "AlcaSkim-Run%s-%s" % \
                            (self.run, self.primaryDataset)

        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters["WorkflowType"] = "Processing"
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"
        self.cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        self.cmsRunNode.application["Executable"] = "cmsRun"
        self.cmsRunNode.application["Project"] = "CMSSW"
        self.cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        inputDataset = self.cmsRunNode.addInputDataset(self.primaryDataset,
                                                       self.parentProcessedDataset)
        inputDataset["DataTier"] = "RECO"
        
        if self.configFile == None:
            self.loadProcessFromFramework()            
        else:
            self.loadProcessFromFile()
            
        self.setupOutputModules()

        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow
    cfgWrapper = CMSSWConfig()
    #cfgWrapper.originalCfg = file(cmsRunCfg).read()
    cfgInt = cfgWrapper.loadConfiguration(cmsCfg)
    cfgInt.validateForProduction()

    if nodeNumber:
        try:
            inputModules = chainedInputs[nodeNumber-1]
        except IndexError:
            inputModules = []
        maker.chainCmsRunNode(stageoutOutputs[nodeNumber-1], *inputModules)

    maker.setConfiguration(cfgWrapper, Type = "instance")
    maker.setCMSSWVersion(versions[nodeNumber])
    maker.setOriginalCfg(file(cmsRunCfg).read())
    maker.setPSetHash(WorkflowTools.createPSetHash(cmsRunCfg))

    nodeNumber += 1
    
maker.changeCategory(category)
maker.setNamingConventionParameters(acquisitionEra, processingString, processingVersion)
maker.workflow.parameters['Conditions'] = conditions
maker.setOutputDatasetDbsStatus(dbsStatus)
    

if selectionEfficiency != None:
    maker.addSelectionEfficiency(selectionEfficiency)
    maker.addCmsGenSelectionEfficiency(selectionEfficiency)

#  //
# // Pileup sample?
    
    cfgWrapper = CMSSWConfig()
    cfgInt = cfgWrapper.loadConfiguration(cmsCfg)
    cfgInt.validateForProduction()

    if nodeNumber:
        try:
            inputModules = chainedInputs[nodeNumber-1]
        except IndexError:
            inputModules = []
        maker.chainCmsRunNode(stageoutOutputs[nodeNumber-1], *inputModules)
            
    maker.setCMSSWVersion(versions[nodeNumber])
    maker.setConfiguration(cfgWrapper, Type = "instance")
    maker.setOriginalCfg(file(cfgFile).read())
    maker.setPSetHash(WorkflowTools.createPSetHash(cfgFile))

    nodeNumber += 1

#  //
# // Pileup sample?
#//
if pileupDS != None:
    maker.addPileupDataset(pileupDS, pileupFilesPerJob)

#  //
# // DataMix pileup sample?
#//
if dataMixDS:
     maker.addPileupDataset(dataMixDS, 1, 'DataMixingModule')
Esempio n. 4
0
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()

        #  //
        # // Add Stage Out node
        #//
        self.saveOutputFor.append(self.cmsRunNode.name)
        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1",
                                      *self.saveOutputFor)
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // Input Dataset?
        #//
        if self.inputDataset['IsUsed']:
            inputDataset = self.cmsRunNodes[0].addInputDataset(
                self.inputDataset['Primary'], self.inputDataset['Processed'])
            inputDataset["DataTier"] = self.inputDataset['DataTier']
            for keyname in [
                    'SplitType',
                    'SplitSize',
                    'OnlySites',
                    'OnlyBlocks',
                    'OnlyClosedBlocks',
            ]:
                if self.inputDataset[keyname] != None:
                    self.workflow.parameters[keyname] = self.inputDataset[
                        keyname]

        #  //
        # // Pileup Datasets?
        #//
        for pileupDataset in self.pileupDatasets:
            puDataset = self.cmsRunNodes[0].addPileupDataset(
                pileupDataset['Primary'], pileupDataset['DataTier'],
                pileupDataset['Processed'])
            puDataset['FilesPerJob'] = pileupDataset['FilesPerJob']
            if pileupDataset['TargetModule'] is not None:
                puDataset['TargetModule'] = pileupDataset['TargetModule']

        #  //
        # // Extract dataset info from cfg
        #//
        datasets = {}
        datasetsToForward = {}
        for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations):

            # Ignore nodes that don't save any output. But keep input dataset
            # in case we need to forward it.
            if cmsRunNode.name not in self.saveOutputFor:
                # Store parent dataset in case we need to forward it.
                if self.inputDataset['IsUsed'] and \
                                            cmsRunNode == self.cmsRunNodes[0]:
                    datasetsToForward[cmsRunNode.name] = \
                                            self.inputDataset['DatasetName']
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        # If the previous cmsRunNode stages out, pull down the
                        # dataset it produced.
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasets['%s:%s' % (inputLink['InputNode'],
                                inputLink['OutputModule'])]
                        # If the previous cmsRunNode does not stage out, then
                        # use it's parent.
                        else:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasetsToForward[inputLink['InputNode']]
                continue

            for outModName in config.outputModules.keys():
                moduleInstance = config.getOutputModule(outModName)
                dataTier = moduleInstance['dataTier']
                filterName = moduleInstance["filterName"]
                primaryName = DatasetConventions.primaryDatasetName(
                    PhysicsChannel=self.channel, )

                if self.useProperNamingConventions:
                    if self.processingString and filterName:
                        processingString = "_".join(
                            (self.processingString, filterName))
                    elif self.processingString:
                        processingString = self.processingString
                    elif filterName:
                        processingString = filterName
                    else:
                        processingString = None
                    processedName = DatasetConventions.properProcessedDatasetName(
                        AcquisitionEra=self.acquisitionEra,
                        ProcessingString=processingString,
                        ProcessingVersion=self.processingVersion,
                        Unmerged=True)
                elif self.acquisitionEra == None:
                    processedName = DatasetConventions.processedDatasetName(
                        Version=cmsRunNode.application['Version'],
                        Label=self.label,
                        Group=self.group,
                        FilterName=filterName,
                        RequestId=self.requestId,
                        Unmerged=True)
                else:
                    processedName = DatasetConventions.csa08ProcessedDatasetName(
                        AcquisitionEra=self.acquisitionEra,
                        Conditions=self.workflow.parameters['Conditions'],
                        ProcessingVersion=self.workflow.
                        parameters['ProcessingVersion'],
                        FilterName=filterName,
                        Unmerged=True)

                dataTier = DatasetConventions.checkDataTier(dataTier)

                moduleInstance['primaryDataset'] = primaryName
                moduleInstance['processedDataset'] = processedName

                outDS = cmsRunNode.addOutputDataset(primaryName, processedName,
                                                    outModName)

                outDS['Status'] = self.outputDatasetStatus
                outDS['DataTier'] = dataTier
                outDS["ApplicationName"] = \
                                         cmsRunNode.application["Executable"]
                outDS["ApplicationFamily"] = outModName
                outDS["PhysicsGroup"] = self.group

                # check for input dataset for first node
                if self.inputDataset[
                        'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]:
                    outDS['ParentDataset'] = self.inputDataset['DatasetName']
                # check for staged out intermediates
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            outDS['ParentDataset'] = datasets[
                                '%s:%s' % (inputLink['InputNode'],
                                           inputLink['OutputModule'])]
                        elif datasetsToForward.get(
                                inputLink['InputNode']) is not None:
                            outDS['ParentDataset'] = \
                                    datasetsToForward[inputLink['InputNode']]

                if self.options['FakeHash']:
                    guid = makeUUID()
                    outDS['PSetHash'] = "hash=%s;guid=%s" % \
                            (self.psetHashes[cmsRunNode.name], guid)
                else:
                    outDS['PSetHash'] = self.psetHashes[cmsRunNode.name]

                # record output in case used as input to a later node
                datasets['%s:%s' % (cmsRunNode.name, outModName)] = \
                                "/%s/%s/%s" % ( outDS['PrimaryDataset'],
                                                  outDS['ProcessedDataset'],
                                                  outDS['DataTier'])

        # optionally remap sibling relationships to parent-child (i.e HLTDEBUG)
        remapParentageForWorkflow(self.workflow)
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow
Esempio n. 5
0
def createMergeJobWorkflow(procSpec, isFastMerge = True, doCleanUp = True, littleE = False):
    """
    _createMergeJobWorkflow_

    Given a Processing Workflow, generate a set of Merge Job
    workflows that can be used to generate actual merge jobs 
    (as opposed to creating datasets like createMergeDatasetWorkflow)

    returns a dictionary of (input, IE MergeSensor watched) dataset name
    to workflow spec instances

    """
    mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge)
    mergeDatasets = mergeDatasetWF.outputDatasets()

    results = {}

    procSpecName = procSpec.workflowName()
    

    for dataset in mergeDatasets:
        inputDataset = dataset['ParentDataset']

        newWF = WorkflowSpec()
        newWF.parameters.update(procSpec.parameters)
        newWF.setWorkflowName(procSpecName)
        newWF.parameters['WorkflowType'] = "Merge"
        

        cmsRunNode = newWF.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Version"] = dataset['ApplicationVersion']
        cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323"

        #  //
        # // Hack to forward UserSandbox to Merge Jobs
        #//
        userSandbox = dataset.get("UserSandbox", None)
        if userSandbox != None:
            cmsRunNode.userSandbox = userSandbox

        #if isFastMerge == True:
        #    if littleE:
        #        cmsRunNode.application["Executable"] = "edmFastMerge"
        #    else:
        #        cmsRunNode.application["Executable"] = _FastMergeBinary
        #    outputModuleName = "EdmFastMerge"
        #else:
        cmsRunNode.application["Executable"] = "cmsRun"
        outputModuleName = "Merged"

        #  //
        # // Input Dataset
        #//
        datasetBits = DatasetConventions.parseDatasetPath(inputDataset)
        inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'],
                                               datasetBits['Processed'])
        inDataset["DataTier"] = datasetBits['DataTier']

        #  //
        # // Output Dataset
        #//
        
        outputDataset = cmsRunNode.addOutputDataset(
            dataset['PrimaryDataset'], 
            dataset['ProcessedDataset'], 
            outputModuleName)

        outputDataset["DataTier"] = dataset['DataTier']
        outputDataset["PSetHash"] = dataset['PSetHash']

        outputDataset["ApplicationName"] = \
                    cmsRunNode.application["Executable"]
        outputDataset["ApplicationProject"] = \
                    cmsRunNode.application["Project"]
        outputDataset["ApplicationVersion"] = \
                    cmsRunNode.application["Version"]
        outputDataset["ApplicationFamily"] = outputModuleName
        outputDataset["PhysicsGroup"] = \
                      procSpec.parameters.get('PhysicsGroup', None)
        outputDataset['ParentDataset'] = inputDataset
                
        
        #  //
        # // Add Stage Out node
        #//
        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        if doCleanUp == True:
            WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1")

        #  //
        # // Add log archive node
        #//
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")

        WorkflowTools.generateFilenames(newWF)

        
        results[inputDataset] = newWF

    return results
    def makeWorkflowSpec(self, name, configFile, enableLazyDownload):
        """
        _makeWorkflowSpec_

        Create a workflow spec instance

        """
        #  //
        # // Initialise basic workflow
        #//
        self.workflow = WorkflowSpec()
        self.workflow.setWorkflowName(name)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(int(time.time()))
        self.workflow.parameters["WorkflowType"] = "Repack"
        self.workflow.parameters["RequestLabel"] = name
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        # runtime support for StreamerJobEntity
        self.workflow.addPythonLibrary("T0.DataStructs")

        cmsRunNode = self.workflow.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        cmsRunNode.application["Executable"] = "cmsRun"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        # runtime express script
        cmsRunNode.scriptControls["PreExe"].append(
            "T0.ExpressInjector.RuntimeExpress")
        
        # build the configuration template for the workflow
        cmsRunNode.cfgInterface = self.buildConfiguration(configFile, enableLazyDownload)
        if cmsRunNode.cfgInterface == None:
            return None

        # override global tag
        cmsRunNode.cfgInterface.conditionsTag = self.globalTag

        # generate Dataset information for workflow from cfgInterface
        for outMod,moduleInstance in cmsRunNode.cfgInterface.outputModules.items():
            primaryName = moduleInstance["primaryDataset"] 
            processedName = moduleInstance["processedDataset"] 

            outDS = cmsRunNode.addOutputDataset(primaryName,
                                                processedName,
                                                outMod)
            
            outDS["DataTier"] = moduleInstance["dataTier"]
            outDS["ApplicationName"] = cmsRunNode.application["Executable"]
            outDS["ApplicationFamily"] = outMod
            outDS["PhysicsGroup"] = "Tier0"
            outDS["ApplicationFamily"] = outMod

            # generate just single LFN stub (all output is unmerged)
            # insert them into the output module and dataset info
            outDS["LFNBase"] = self.getLFN(moduleInstance, dataType = 'express', Unmerged = True)
            moduleInstance["LFNBase"] = outDS["LFNBase"]
            moduleInstance["logicalFileName"] = os.path.join(
                outDS["LFNBase"], "%s.root" % outMod
                )

        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")
        
        return self.workflow
try:
    loader.load()
except Exception, ex:
    msg = "Couldn't load CMSSW libraries: %s" % ex
    raise RuntimeError, msg

loadedModule = imp.load_source( os.path.basename(cfgFile).replace(".py", ""), cfgFile )

cmsRunNode.cfgInterface = CMSSWConfig()
loadedConfig = cmsRunNode.cfgInterface.loadConfiguration(loadedModule.process)
loadedConfig.validateForProduction()

loader.unload()

# generate Dataset information for workflow from cfgInterface
for moduleName,outMod in cmsRunNode.cfgInterface.outputModules.items():

    outMod["LFNBase"] = lfnbase
    outMod["logicalFileName"] = os.path.join(
        lfnbase, "%s.root" % moduleName
        )

WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")

workflow.save("%s-Workflow.xml" % workflowName)

print "Created: %s-Workflow.xml" % workflowName
print "From: %s " % cfgFile

Esempio n. 8
0
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()
        
        #  //
        # // Add Stage Out node
        #//
        self.saveOutputFor.append(self.cmsRunNode.name)
        WorkflowTools.addStageOutNode(self.cmsRunNode,
                        "stageOut1", *self.saveOutputFor)
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // Input Dataset?
        #//
        if self.inputDataset['IsUsed']:
            inputDataset = self.cmsRunNodes[0].addInputDataset(
                self.inputDataset['Primary'],
                self.inputDataset['Processed']
                )
            inputDataset["DataTier"] = self.inputDataset['DataTier']
            for keyname in [
                'SplitType',
                'SplitSize',
                'OnlySites',
                'OnlyBlocks',
                'OnlyClosedBlocks',
                ]:
                if self.inputDataset[keyname] != None:
                    self.workflow.parameters[keyname] = self.inputDataset[keyname]
                    
            
        #  //
        # // Pileup Datasets?
        #//
        for pileupDataset in self.pileupDatasets:
            puDataset = self.cmsRunNodes[0].addPileupDataset(
                pileupDataset['Primary'],
                pileupDataset['DataTier'],
                pileupDataset['Processed'])
            puDataset['FilesPerJob'] = pileupDataset['FilesPerJob']
            if pileupDataset['TargetModule'] is not None:
                puDataset['TargetModule'] = pileupDataset['TargetModule']
            
        
        #  //
        # // Extract dataset info from cfg
        #//
        datasets = {}
        datasetsToForward = {}
        for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations):
            
            # Ignore nodes that don't save any output. But keep input dataset
            # in case we need to forward it.
            if cmsRunNode.name not in self.saveOutputFor:
                # Store parent dataset in case we need to forward it.
                if self.inputDataset['IsUsed'] and \
                                            cmsRunNode == self.cmsRunNodes[0]:
                    datasetsToForward[cmsRunNode.name] = \
                                            self.inputDataset['DatasetName']
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        # If the previous cmsRunNode stages out, pull down the
                        # dataset it produced.
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasets['%s:%s' % (inputLink['InputNode'],
                                inputLink['OutputModule'])]
                        # If the previous cmsRunNode does not stage out, then
                        # use it's parent.
                        else:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasetsToForward[inputLink['InputNode']]
                continue
            
            for outModName in config.outputModules.keys():
                moduleInstance = config.getOutputModule(outModName)
                dataTier = moduleInstance['dataTier']
                filterName = moduleInstance["filterName"]
                primaryName = DatasetConventions.primaryDatasetName(
                                        PhysicsChannel = self.channel,
                                        )

                if self.useProperNamingConventions:
                    if self.processingString and filterName:
                        processingString = "_".join((self.processingString, filterName))
                    elif self.processingString:
                        processingString = self.processingString
                    elif filterName:
                        processingString = filterName
                    else:
                        processingString = None
                    processedName = DatasetConventions.properProcessedDatasetName(
                        AcquisitionEra = self.acquisitionEra,
                        ProcessingString = processingString,
                        ProcessingVersion = self.processingVersion,
                        Unmerged = True
                        )
                elif self.acquisitionEra == None:
                    processedName = DatasetConventions.processedDatasetName(
                        Version = cmsRunNode.application['Version'],
                        Label = self.label,
                        Group = self.group,
                        FilterName = filterName,
                        RequestId = self.requestId,
                        Unmerged = True
                        )
                else:
                    processedName = DatasetConventions.csa08ProcessedDatasetName(
                        AcquisitionEra = self.acquisitionEra,
                        Conditions = self.workflow.parameters['Conditions'],
                        ProcessingVersion = self.workflow.parameters['ProcessingVersion'],
                        FilterName = filterName,
                        Unmerged = True
                        )
                  
                dataTier = DatasetConventions.checkDataTier(dataTier)

                moduleInstance['primaryDataset'] = primaryName
                moduleInstance['processedDataset'] = processedName
    
                outDS = cmsRunNode.addOutputDataset(primaryName, 
                                                         processedName,
                                                         outModName)

                outDS['Status'] = self.outputDatasetStatus                
                outDS['DataTier'] = dataTier
                outDS["ApplicationName"] = \
                                         cmsRunNode.application["Executable"]
                outDS["ApplicationFamily"] = outModName
                outDS["PhysicsGroup"] = self.group
    
                # check for input dataset for first node
                if self.inputDataset['IsUsed'] and cmsRunNode == self.cmsRunNodes[0]:
                    outDS['ParentDataset'] = self.inputDataset['DatasetName']
                # check for staged out intermediates
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            outDS['ParentDataset'] = datasets['%s:%s' % (inputLink['InputNode'],
                                                                    inputLink['OutputModule'])]
                        elif datasetsToForward.get(
                                inputLink['InputNode']) is not None:
                            outDS['ParentDataset'] = \
                                    datasetsToForward[inputLink['InputNode']]

                if self.options['FakeHash']:
                    guid = makeUUID()
                    outDS['PSetHash'] = "hash=%s;guid=%s" % \
                            (self.psetHashes[cmsRunNode.name], guid)
                else:
                    outDS['PSetHash'] = self.psetHashes[cmsRunNode.name]

                # record output in case used as input to a later node
                datasets['%s:%s' % (cmsRunNode.name, outModName)] = \
                                "/%s/%s/%s" % ( outDS['PrimaryDataset'],
                                                  outDS['ProcessedDataset'],
                                                  outDS['DataTier'])

        # optionally remap sibling relationships to parent-child (i.e HLTDEBUG)
        remapParentageForWorkflow(self.workflow)
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow
Esempio n. 9
0
    RealPSetHash = None
    if not noRecreate:
        cfgFileContent = file(cfgFile).read()
        replacer=re.compile("\}[\s]*$")
        psetHackString = "\n  PSet psetHack = { string relval = \"%s\"  }\n}\n" % (
            prodName,
            )
        cfgFileContent = replacer.sub(psetHackString, cfgFileContent)
        handle = open(cfgFile, 'w')
        handle.write(cfgFileContent)
        handle.close()    
        #  //
        # // Generate PSet Hash
        #//
        print "generate PSet Hash for %s"%cfgFile
        RealPSetHash = WorkflowTools.createPSetHash(cfgFile)

        ## AF: need to recreate cfg parser since it's not stored in pycfg
        #  //
        # // cfg python parser from CMSSW
        #//
        print "CMSSW python parser on %s \n ....it can take a while..."%cfgFile
        from FWCore.ParameterSet.Config import include
        from FWCore.ParameterSet.parsecf.pyparsing import *
        try:
          cmsCfg = include(cfgFile)
        except ParseException, ex:
          print "Error in CMSSW python parser: ParseException \n %s \n"%ex
          continue
        except ParseFatalException, ex:
          print "Error in CMSSW python parser: ParseFatalException \n %s \n"%ex
Esempio n. 10
0
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()

        #  //
        # // Input Dataset required for Tier0
        #//

        inputDataset = self.cmsRunNode.addInputDataset(
            self.inputDataset['Primary'], self.inputDataset['Processed'])
        inputDataset["DataTier"] = self.inputDataset['DataTier']
        for keyname in [
                'SplitType',
                'SplitSize',
                'OnlySites',
                'OnlyBlocks',
                'OnlyClosedBlocks',
        ]:
            if self.inputDataset[keyname] != None:
                self.workflow.parameters[keyname] = self.inputDataset[keyname]

        #  //
        # // Extract dataset info from cfg
        #//
        for outModName in self.configuration.outputModules.keys():
            moduleInstance = self.configuration.getOutputModule(outModName)
            #  //
            # // Data Tier same as input
            #//
            dataTier = self.inputDataset['DataTier']
            #  //
            # // Output primary dataset same as input primary
            #//
            primaryName = self.inputDataset['Primary']

            #  //
            # // Output processed dataset
            #//  (Note we pass way more info than is used, since
            #  //conventions have a tendency to change in CMS...
            # //
            #//
            processedName = DatasetConventions.tier0ProcessedDatasetName(
                Version=self.cmsswVersion,
                InputPrimaryDataset=self.inputDataset['Primary'],
                InputProcessedDataset=self.inputDataset['Processed'],
                Label=self.label,
                Group=self.group,
                RequestId=self.requestId,
                Unmerged=self.unmergedDataset)

            dataTier = DatasetConventions.checkDataTier(dataTier)

            moduleInstance['primaryDataset'] = primaryName
            moduleInstance['processedDataset'] = processedName

            outDS = self.cmsRunNode.addOutputDataset(primaryName,
                                                     processedName, outModName)

            outDS['DataTier'] = dataTier
            outDS["ApplicationName"] = \
                                     self.cmsRunNode.application["Executable"]
            outDS["ApplicationFamily"] = outModName
            outDS["PhysicsGroup"] = self.group
            outDS["ApplicationFamily"] = outModName

            if self.inputDataset['IsUsed']:
                outDS['ParentDataset'] = self.inputDataset['DatasetName']

            if self.options['FakeHash']:
                guid = makeUUID()
                outDS['PSetHash'] = "hash=%s;guid=%s" % (self.psetHash, guid)
            else:
                outDS['PSetHash'] = self.psetHash

        #  //
        # // Add Stage Out node
        #//
        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // generate tier0 LFN bases for this workflow
        #//
        tier0LFN = self.makeTier0LFN()

        self.workflow.parameters['MergedLFNBase'] = tier0LFN
        self.workflow.parameters['UnmergedLFNBase'] = tier0LFN

        return self.workflow
Esempio n. 11
0
def createMergeJobWorkflow(procSpec,
                           isFastMerge=True,
                           doCleanUp=True,
                           littleE=False):
    """
    _createMergeJobWorkflow_

    Given a Processing Workflow, generate a set of Merge Job
    workflows that can be used to generate actual merge jobs 
    (as opposed to creating datasets like createMergeDatasetWorkflow)

    returns a dictionary of (input, IE MergeSensor watched) dataset name
    to workflow spec instances

    """
    mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge)
    mergeDatasets = mergeDatasetWF.outputDatasets()

    results = {}

    procSpecName = procSpec.workflowName()

    for dataset in mergeDatasets:
        inputDataset = dataset['ParentDataset']

        newWF = WorkflowSpec()
        newWF.parameters.update(procSpec.parameters)
        newWF.setWorkflowName(procSpecName)
        newWF.parameters['WorkflowType'] = "Merge"

        cmsRunNode = newWF.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Version"] = dataset['ApplicationVersion']
        cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323"

        #  //
        # // Hack to forward UserSandbox to Merge Jobs
        #//
        userSandbox = dataset.get("UserSandbox", None)
        if userSandbox != None:
            cmsRunNode.userSandbox = userSandbox

        #if isFastMerge == True:
        #    if littleE:
        #        cmsRunNode.application["Executable"] = "edmFastMerge"
        #    else:
        #        cmsRunNode.application["Executable"] = _FastMergeBinary
        #    outputModuleName = "EdmFastMerge"
        #else:
        cmsRunNode.application["Executable"] = "cmsRun"
        outputModuleName = "Merged"

        #  //
        # // Input Dataset
        #//
        datasetBits = DatasetConventions.parseDatasetPath(inputDataset)
        inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'],
                                               datasetBits['Processed'])
        inDataset["DataTier"] = datasetBits['DataTier']

        #  //
        # // Output Dataset
        #//

        outputDataset = cmsRunNode.addOutputDataset(
            dataset['PrimaryDataset'], dataset['ProcessedDataset'],
            outputModuleName)

        outputDataset["DataTier"] = dataset['DataTier']
        outputDataset["PSetHash"] = dataset['PSetHash']

        outputDataset["ApplicationName"] = \
                    cmsRunNode.application["Executable"]
        outputDataset["ApplicationProject"] = \
                    cmsRunNode.application["Project"]
        outputDataset["ApplicationVersion"] = \
                    cmsRunNode.application["Version"]
        outputDataset["ApplicationFamily"] = outputModuleName
        outputDataset["PhysicsGroup"] = \
                      procSpec.parameters.get('PhysicsGroup', None)
        outputDataset['ParentDataset'] = inputDataset

        #  //
        # // Add Stage Out node
        #//
        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        if doCleanUp == True:
            WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1")

        #  //
        # // Add log archive node
        #//
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")

        WorkflowTools.generateFilenames(newWF)

        results[inputDataset] = newWF

    return results
Esempio n. 12
0
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()

        #  //
        # // Input Dataset required for Tier0
        #//
    
        inputDataset = self.cmsRunNode.addInputDataset(
            self.inputDataset['Primary'],
            self.inputDataset['Processed']
            )
        inputDataset["DataTier"] = self.inputDataset['DataTier']
        for keyname in [
            'SplitType',
            'SplitSize',
            'OnlySites',
            'OnlyBlocks',
            'OnlyClosedBlocks',
            ]:
            if self.inputDataset[keyname] != None:
                self.workflow.parameters[keyname] = self.inputDataset[keyname]
                
        
        #  //
        # // Extract dataset info from cfg
        #//
        for outModName in self.configuration.outputModules.keys():
            moduleInstance = self.configuration.getOutputModule(outModName)
            #  //
            # // Data Tier same as input
            #//
            dataTier = self.inputDataset['DataTier']
            #  //
            # // Output primary dataset same as input primary
            #//
            primaryName = self.inputDataset['Primary']

            #  //
            # // Output processed dataset
            #//  (Note we pass way more info than is used, since
            #  //conventions have a tendency to change in CMS...
            # //
            #//
            processedName = DatasetConventions.tier0ProcessedDatasetName(
                Version = self.cmsswVersion,
                InputPrimaryDataset = self.inputDataset['Primary'],
                InputProcessedDataset = self.inputDataset['Processed'],
                Label = self.label,
                Group = self.group,
                RequestId = self.requestId,
                Unmerged = self.unmergedDataset
                )
            
            dataTier = DatasetConventions.checkDataTier(dataTier)
            
            moduleInstance['primaryDataset'] = primaryName
            moduleInstance['processedDataset'] = processedName

            outDS = self.cmsRunNode.addOutputDataset(primaryName, 
                                                     processedName,
                                                     outModName)
            
            outDS['DataTier'] = dataTier
            outDS["ApplicationName"] = \
                                     self.cmsRunNode.application["Executable"]
            outDS["ApplicationFamily"] = outModName
            outDS["PhysicsGroup"] = self.group
            outDS["ApplicationFamily"] = outModName


            if self.inputDataset['IsUsed']:
                outDS['ParentDataset'] = self.inputDataset['DatasetName']
                
            if self.options['FakeHash']:
                guid = makeUUID()
                outDS['PSetHash'] = "hash=%s;guid=%s" % (self.psetHash,
                                                         guid)
            else:
                outDS['PSetHash'] = self.psetHash

            
        #  //
        # // Add Stage Out node
        #//
        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // generate tier0 LFN bases for this workflow
        #//
        tier0LFN = self.makeTier0LFN()

        self.workflow.parameters['MergedLFNBase'] = tier0LFN
        self.workflow.parameters['UnmergedLFNBase'] = tier0LFN
        
        return self.workflow
    def makeWorkflowSpec(self, name, enableLazyDownload, configFile = None):
        """
        _makeWorkflowSpec_

        Create a workflow spec instance
        
        """
        self.workflow = WorkflowSpec()
        self.workflow.setWorkflowName(name)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(int(time.time()))
        self.workflow.parameters["WorkflowType"] = "Repack"
        self.workflow.parameters["RequestLabel"] = name
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        cmsRunNode = self.workflow.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        cmsRunNode.application["Executable"] = "cmsRun"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        # runtime express merge script
        cmsRunNode.scriptControls["PreExe"].append(
            "T0.ExpressMerger.RuntimeExpressMerger"
            )

        # build the configuration template for the workflow
        cmsRunNode.cfgInterface = self.buildConfiguration(enableLazyDownload, configFile)
        if cmsRunNode.cfgInterface == None:
            return None

        # generate Dataset information for workflow from cfgInterface
        for outMod,moduleInstance in cmsRunNode.cfgInterface.outputModules.items():
            primaryName = moduleInstance["primaryDataset"] 
            processedName = moduleInstance["processedDataset"] 

            outDS = cmsRunNode.addOutputDataset(primaryName,
                                                processedName,
                                                outMod)
            
            outDS["DataTier"] = moduleInstance["dataTier"]
            outDS["ApplicationName"] = cmsRunNode.application["Executable"]
            outDS["ApplicationFamily"] = outMod
            outDS["PhysicsGroup"] = "Tier0"
            outDS["ApplicationFamily"] = outMod

            # generate just single LFN stub (all output is merged)
            # insert them into the output module and dataset info
            outDS["LFNBase"] = self.getLFN(moduleInstance, dataType = "express")
            moduleInstance["LFNBase"] = outDS["LFNBase"]
            moduleInstance["logicalFileName"] = os.path.join(
                outDS["LFNBase"], "%s.root" % outMod
                )

        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")

        # override stageout
        #
        # FIXME: This hardcodes the TFC LFN prefix !!!
##        if svcClass != None:
##            finder = NodeFinder("stageOut1")
##            self.workflow.payload.operate(finder)
##            node = finder.result

##            WorkflowTools.addStageOutOverride(node,
##                                              "rfcp",
##                                              "",
##                                              "srm-cms.cern.ch",
##                                              "rfio:///castor?svcClass=%s&path=/castor/cern.ch/cms" % svcClass)

        return self.workflow