コード例 #1
0
ファイル: T0ASTPlugin.py プロジェクト: giffels/PRODAGENT
    def createWorkflow(self, runNumber, primaryDataset,
                       processedDataset, dataTier):
        """
        _createWorkflow_

        Create a workflow for a given run and primary dataset.  If the workflow
        has been created previously, load it and use it.
        """
        jobCache = os.path.join(self.args["ComponentDir"], "T0ASTPlugin",
                                "Run" + runNumber)
        if not os.path.exists(jobCache):
            os.makedirs(jobCache)

        workflowSpecFileName = "DQMHarvest-Run%s-%s-workflow.xml" % (runNumber, primaryDataset)
        workflowSpecPath = os.path.join(jobCache, workflowSpecFileName)

        if os.path.exists(workflowSpecPath):
            msg = "Loading existing workflow for dataset: %s\n " % primaryDataset
            msg += " => %s\n" % workflowSpecPath
            logging.info(msg)

            workflowSpec = WorkflowSpec()
            workflowSpec.load(workflowSpecPath)
            return (workflowSpec, workflowSpecPath)
            
        msg = "No workflow found for dataset: %s\n " % primaryDataset
        msg += "Looking up software version and generating workflow..."

        recoConfig = self.t0astWrapper.listRecoConfig(runNumber, primaryDataset)

        if not recoConfig["DO_RECO"]:
            logging.info("RECO disabled for dataset %s" % primaryDataset)
            return (None, None)

        globalTag = self.args.get("OverrideGlobalTag", None)
        if globalTag == None:
            globalTag = recoConfig["GLOBAL_TAG"]
            
        cmsswVersion = self.args.get("OverrideCMSSW", None)
        if cmsswVersion == None:
            cmsswVersion = recoConfig["CMSSW_VERSION"]

        datasetPath = "/%s/%s/%s" % (primaryDataset, processedDataset, dataTier)
        workflowSpec = createHarvestingWorkflow(datasetPath, self.site, 
                                                self.args["CmsPath"],
                                                self.args["ScramArch"],
                                                cmsswVersion, globalTag,
                                                configFile=self.args["ConfigFile"],
                                                DQMServer=self.args['DQMServer'],
                                                proxyLocation=self.args['proxyLocation'],
                                                DQMCopyToCERN=self.args['DQMCopyToCERN'],
                                                doStageOut=self.args['DoStageOut'])
        
        
        workflowSpec.save(workflowSpecPath)
        msg = "Created Harvesting Workflow:\n %s" % workflowSpecPath
        logging.info(msg)
        self.publishWorkflow(workflowSpecPath, workflowSpec.workflowName())
        return (workflowSpec, workflowSpecPath)
コード例 #2
0
def createJobSpec(jobSpecId,
                  workflowSpecFile,
                  filename,
                  runNumber,
                  eventCount,
                  firstEvent=None,
                  saveString=False,
                  loadString=True):

    #  //
    # // Load workflow
    #//
    workflowSpec = WorkflowSpec()
    if loadString:
        workflowSpec.loadString(workflowSpecFile)
    else:
        workflowSpec.load(workflowSpecFile)

    #  //
    # // Create JobSpec
    #//
    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (workflowSpec.workflowName(), runNumber)

    #jobSpec.setJobName(jobName)
    jobSpec.setJobName(jobSpecId)
    jobSpec.setJobType("Processing")
    jobSpec.parameters['RunNumber'] = runNumber
    jobSpec.parameters['EventCount'] = eventCount

    jobSpec.payload.operate(DefaultLFNMaker(jobSpec))

    if firstEvent != None:
        jobSpec.parameters['FirstEvent'] = firstEvent

    cfgMaker = ConfigGenerator(jobSpec)
    jobSpec.payload.operate(cfgMaker)

    if saveString:
        return jobSpec.saveString()
    jobSpec.save(filename)
    return
コード例 #3
0
ファイル: EventJobSpec.py プロジェクト: PerilousApricot/CRAB2
def createJobSpec(jobSpecId,workflowSpecFile, filename, runNumber, eventCount,  firstEvent = None,saveString=False,loadString=True):

    #  //
    # // Load workflow
    #//
    workflowSpec = WorkflowSpec()
    if loadString:
        workflowSpec.loadString(workflowSpecFile)
    else:
        workflowSpec.load(workflowSpecFile)

    

    #  //
    # // Create JobSpec
    #//
    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (
        workflowSpec.workflowName(),
        runNumber
            )


    #jobSpec.setJobName(jobName)
    jobSpec.setJobName(jobSpecId)
    jobSpec.setJobType("Processing")
    jobSpec.parameters['RunNumber'] = runNumber
    jobSpec.parameters['EventCount'] = eventCount

    jobSpec.payload.operate(DefaultLFNMaker(jobSpec))

    if firstEvent != None:
        jobSpec.parameters['FirstEvent'] = firstEvent

    cfgMaker = ConfigGenerator(jobSpec)
    jobSpec.payload.operate(cfgMaker)

    if saveString:    
       return jobSpec.saveString()
    jobSpec.save(filename)
    return
コード例 #4
0
ファイル: DBSPlugin.py プロジェクト: giffels/PRODAGENT
    def __call__(self, collectPayload):
        """
        _operator(collectPayload)_

        Given the dataset and run in the payload, callout to DBS
        to find the files to be harvested

        """
        msg = "DBSPlugin invoked for %s" % str(collectPayload)
        logging.info(msg)


        site = self.args.get("Site", "srm.cern.ch")

        baseCache = os.path.join(self.args['ComponentDir'],
                                 "DBSPlugin")
        if not os.path.exists(baseCache):
            os.makedirs(baseCache)

        datasetCache = os.path.join(baseCache,
                                    collectPayload['PrimaryDataset'],
                                    collectPayload['ProcessedDataset'],
                                    collectPayload['DataTier'])

        if not os.path.exists(datasetCache):
            os.makedirs(datasetCache)

        workflowFile = os.path.join(
            datasetCache,
            "%s-%s-%s-DQMHarvest-Workflow.xml" % (
            collectPayload['PrimaryDataset'],
            collectPayload['ProcessedDataset'],
            collectPayload['DataTier'])
            )
        if not os.path.exists(workflowFile):
            msg = "No workflow found for dataset: %s\n " % (
                collectPayload.datasetPath(),)
            msg += "Looking up software version and generating workflow..."

            if self.args.get("OverrideGlobalTag", None) == None:
                globalTag = findGlobalTagForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'],
                    collectPayload['RunNumber'])
            else:
                globalTag = self.args['OverrideGlobalTag']


            if self.args.get("OverrideCMSSW", None) != None:
                cmsswVersion = self.args['OverrideCMSSW']
                msg = "Using Override for CMSSW Version %s" % (
                    self.args['OverrideCMSSW'],)
                logging.info(msg)
            else:
                cmsswVersion = findVersionForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'],
                    collectPayload['RunNumber'])
                msg = "Found CMSSW Version for dataset/run\n"
                msg += " Dataset %s Run %s\n" % (collectPayload.datasetPath(),
                                                 collectPayload['RunNumber'])
                msg += " CMSSW Version = %s\n " % cmsswVersion
                logging.info(msg)

            workflowSpec = createHarvestingWorkflow(
                collectPayload.datasetPath(),
                site,
                self.args['CmsPath'],
                self.args['ScramArch'],
                cmsswVersion,
                globalTag,
                configFile=self.args['ConfigFile'],
                DQMServer=self.args['DQMServer'],
                proxyLocation=self.args['proxyLocation'],
                DQMCopyToCERN=self.args['DQMCopyToCERN'],
                doStageOut=self.args['DoStageOut'])

            workflowSpec.save(workflowFile)
            msg = "Created Harvesting Workflow:\n %s" % workflowFile
            logging.info(msg)
            self.publishWorkflow(workflowFile, workflowSpec.workflowName())
        else:
            msg = "Loading existing workflow for dataset: %s\n " % (
                collectPayload.datasetPath(),)
            msg += " => %s\n" % workflowFile
            logging.info(msg)

            workflowSpec = WorkflowSpec()
            workflowSpec.load(workflowFile)




        job = {}
        jobSpec = workflowSpec.createJobSpec()
        jobName = "%s-%s-%s" % (
            workflowSpec.workflowName(),
            collectPayload['RunNumber'],
            time.strftime("%H-%M-%S-%d-%m-%y")
            )

        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Harvesting")
        jobSpec.parameters['RunNumber'] = collectPayload['RunNumber']
        jobSpec.addWhitelistSite(site)
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))



        jobSpec.payload.cfgInterface.inputFiles.extend(
            listFilesInRun(
            DBSReader(self.dbsUrl),
            collectPayload['PrimaryDataset'],
            collectPayload['ProcessedDataset'],
            collectPayload['DataTier'],
            collectPayload['RunNumber'])
            )

        specCacheDir =  os.path.join(
            datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)

        jobSpec.save(jobSpecFile)


        job["JobSpecId"] = jobName
        job["JobSpecFile"] = jobSpecFile
        job['JobType'] = "Harvesting"
        job["WorkflowSpecId"] = workflowSpec.workflowName(),
        job["WorkflowPriority"] = 10
        job["Sites"] = [site]
        job["Run"] = collectPayload['RunNumber']
        job['WorkflowSpecFile'] = workflowFile



        msg = "Harvesting Job Created for\n"
        msg += " => Run:       %s\n" % collectPayload['RunNumber']
        msg += " => Primary:   %s\n" % collectPayload['PrimaryDataset']
        msg += " => Processed: %s\n" % collectPayload['ProcessedDataset']
        msg += " => Tier:      %s\n" % collectPayload['DataTier']
        msg += " => Workflow:  %s\n" % job['WorkflowSpecId']
        msg += " => Job:       %s\n" % job['JobSpecId']
        msg += " => Site:      %s\n" % job['Sites']
        logging.info(msg)

        return [job]
コード例 #5
0
ファイル: removeWorkflow.py プロジェクト: giffels/PRODAGENT
from JobQueue.JobQueueDB import JobQueueDB
import ProdAgent.WorkflowEntities.Aux as WEAux
import ProdAgent.WorkflowEntities.Workflow as WEWorkflow

workflow = sys.argv[1]
workflowSpec = WorkflowSpec()
workflowSpec.load(workflow)

#  //
# // Clean out job cache
#//
config = loadProdAgentConfiguration()
compCfg = config.getConfig("JobCreator")
creatorCache = os.path.expandvars(compCfg['ComponentDir'])

workflowCache = os.path.join(creatorCache, workflowSpec.workflowName())
if os.path.exists(workflowCache):
    os.system("/bin/rm -rf %s" % workflowCache)



Session.set_database(dbConfig)
Session.connect()
Session.start_transaction()

#  //
# // clean out queue
#//
jobQ = JobQueueDB()
jobQ.removeWorkflow(workflowSpec.workflowName())
コード例 #6
0
ファイル: RequestIterator.py プロジェクト: giffels/PRODAGENT
class RequestIterator:
    """
    _RequestIterator_

    Working from a Generic Workflow template, generate
    concrete jobs from it, keeping in-memory history

    """
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.count = 0
        self.runIncrement = 1
        self.currentJob = None
        self.sitePref = None
        self.pileupDatasets = {}
        self.ownedJobSpecs = {}
        
        #  //
        # // Initially hard coded, should be extracted from Component Config
        #//
        self.eventsPerJob = 10 
        
        self.workflowSpec = WorkflowSpec()
        try:
         self.workflowSpec.load(workflowSpecFile)
        except:
         logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile))
         return

        if self.workflowSpec.parameters.get("RunIncrement", None) != None:
            self.runIncrement = int(
                self.workflowSpec.parameters['RunIncrement']
                )

    
        self.generators = GeneratorMaker()
        self.workflowSpec.payload.operate(self.generators)
        
        
        
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
        


    def loadPileupDatasets(self):
        """
        _loadPileupDatasets_

        Are we dealing with pileup? If so pull in the file list
        
        """
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            self.pileupDatasets = createPileupDatasets(self.workflowSpec)
        return

    def loadPileupSites(self):
        """
        _loadPileupSites_
                                                                                                              
        Are we dealing with pileup? If so pull in the site list
                                                                                                              
        """
        sites = []
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            sites = getPileupSites(self.workflowSpec)
        return sites
                                                                                                              


            
    def __call__(self):
        """
        _operator()_

        When called generate a new concrete job payload from the
        generic workflow and return it.

        """
        newJobSpec = self.createJobSpec()
        self.count += self.runIncrement
        return newJobSpec


    def createJobSpec(self):
        """
        _createJobSpec_

        Load the WorkflowSpec object and generate a JobSpec from it

        """
        
        jobSpec = self.workflowSpec.createJobSpec()
        jobName = "%s-%s" % (
            self.workflowSpec.workflowName(),
            self.count,
            )
        self.currentJob = jobName
        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Processing")
        jobSpec.parameters['RunNumber'] = self.count

        
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))
        jobSpec.payload.operate(self.generateJobConfig)
        jobSpec.payload.operate(self.generateCmsGenConfig)
        specCacheDir =  os.path.join(
            self.specCache, str(self.count // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)
        self.ownedJobSpecs[jobName] = jobSpecFile

        
        #  //
        # // Add site pref if set
        #//
        if self.sitePref != None:
#
#AF:  Allow site pref to be a comma separated list of sites, each one
#     added in the Whitelist:
#            jobSpec.addWhitelistSite(self.sitePref)
          for siteWhite in self.sitePref.split(","): 
            jobSpec.addWhitelistSite(siteWhite)
            
        jobSpec.save(jobSpecFile)        
        return jobSpecFile
        
        
    def generateJobConfig(self, jobSpecNode):
        """
        _generateJobConfig_
        
        Operator to act on a JobSpecNode tree to convert the template
        config file into a JobSpecific Config File
                
        """
        if jobSpecNode.name not in self.generators.keys():
            return
        generator = self.generators[jobSpecNode.name]

        useOutputMaxEv = False
        if jobSpecNode.cfgInterface != None:
            outMaxEv = jobSpecNode.cfgInterface.maxEvents['output']
            if outMaxEv != None:
                useOutputMaxEv = True

        if useOutputMaxEv:
            jobCfg = generator(self.currentJob,
                               maxEventsWritten = self.eventsPerJob,
                               firstRun = self.count)
        else:
            jobCfg = generator(self.currentJob,
                               maxEvents = self.eventsPerJob,
                               firstRun = self.count)
        
        #  //
        # // Is there pileup for this node?
        #//
        if self.pileupDatasets.has_key(jobSpecNode.name):
            puDataset = self.pileupDatasets[jobSpecNode.name]
            logging.debug("Node: %s has a pileup dataset: %s" % (
                jobSpecNode.name,  puDataset.dataset,
                ))
            fileList = puDataset.getPileupFiles()
            jobCfg.pileupFiles = fileList

            
            
        
        jobSpecNode.cfgInterface = jobCfg
        return


    def generateCmsGenConfig(self, jobSpecNode):
        """
        _generateCmsGenConfig_

        Process CmsGen type nodes to insert maxEvents and run numbers
        for cmsGen jobs

        """
        if jobSpecNode.type != "CmsGen":
            return

        jobSpecNode.applicationControls['firstRun'] = self.count
        jobSpecNode.applicationControls['maxEvents'] = self.eventsPerJob
        jobSpecNode.applicationControls['randomSeed'] = randomSeed()
        jobSpecNode.applicationControls['fileName'] = "%s-%s.root" % (
            self.currentJob, jobSpecNode.name)
        jobSpecNode.applicationControls['logicalFileName'] = "%s-%s.root" % (
            self.currentJob, jobSpecNode.name)
        return
        

    def removeSpec(self, jobSpecId):
        """
        _removeSpec_

        Remove a Spec file when it has been successfully injected

        """
        if jobSpecId not in self.ownedJobSpecs.keys():
            return

        logging.info("Removing JobSpec For: %s" % jobSpecId)
        filename = self.ownedJobSpecs[jobSpecId]
        if os.path.exists(filename):
            os.remove(filename)
            del self.ownedJobSpecs[jobSpecId]
        return

        


    def save(self, directory):
        """
        _save_

        Persist this objects state into an XML file and save
        it in the directory provided

        """
        doc = IMProvDoc("RequestIterator")
        node = IMProvNode(self.workflowSpec.workflowName())
        doc.addNode(node)

        node.addNode(IMProvNode("Run", None, Value = str(self.count)))
        node.addNode(
            IMProvNode("EventsPerJob", None, Value = str(self.eventsPerJob))
            )
        node.addNode(IMProvNode("SitePref", None, Value = str(self.sitePref)))

        pu = IMProvNode("Pileup")
        node.addNode(pu)
        for key, value in self.pileupDatasets.items():
            puNode = value.save()
            puNode.attrs['PayloadNode'] = key
            pu.addNode(puNode)

        specs = IMProvNode("JobSpecs")
        node.addNode(specs)
        for key, val in self.ownedJobSpecs.items():
            specs.addNode(IMProvNode("JobSpec", val, ID = key))
            
        fname = os.path.join(
            directory,
            "%s-Persist.xml" % self.workflowSpec.workflowName()
            )
        handle = open(fname, 'w')
        handle.write(doc.makeDOMDocument().toprettyxml())
        handle.close()
        return


    def load(self, directory):
        """
        _load_

        Load this instance given the workflow and directory containing
        the persistency file

        """
        fname = os.path.join(
            directory,
            "%s-Persist.xml" % self.workflowSpec.workflowName()
            )

        try:
            node = loadIMProvFile(fname)
        except Exception, ex:
            msg = "ERROR: Corrupted Persistency File:\n"
            msg += "  => %s\n" % fname
            msg += "Cannot be read:\n  => %s\n" % str(ex)
            logging.error(msg)
            return
        

        qbase = "/RequestIterator/%s" % self.workflowSpec.workflowName()

        runQ = IMProvQuery("%s/Run[attribute(\"Value\")]" % qbase)
        eventQ = IMProvQuery("%s/EventsPerJob[attribute(\"Value\")]" % qbase)
        siteQ = IMProvQuery("%s/SitePref[attribute(\"Value\")]" % qbase)

        runVal = int(runQ(node)[-1])
        eventVal = int(eventQ(node)[-1])
        siteVal = str(siteQ(node)[-1])
        if siteVal.lower() == "none":
            siteVal = None

        self.count = runVal
        self.eventsPerJob = eventVal
        self.sitePref = siteVal
        
        puQ = IMProvQuery("%s/Pileup/*" % qbase)
        puNodes = puQ(node)
        for puNode in puNodes:
            payloadNode = str(puNode.attrs.get("PayloadNode"))
            puDataset = PileupDataset("dummy", 1)
            puDataset.load(puNode)
            self.pileupDatasets[payloadNode] = puDataset

        specQ = IMProvQuery("%s/JobSpecs/*" % qbase)
        specNodes = specQ(node)
        for specNode in specNodes:
            specId = str(specNode.attrs['ID'])
            specFile = str(specNode.chardata).strip()
            self.ownedJobSpecs[specId] = specFile

        return
コード例 #7
0
ファイル: DatasetIterator.py プロジェクト: giffels/PRODAGENT
class DatasetIterator:
    """
    _DatasetIterator_

    Working from a Generic Workflow template, generate
    concrete jobs from it, keeping in-memory history

    """
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.currentJob = None
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(workflowSpecFile)
        self.currentJobDef = None
        self.count = 0
        self.onlyClosedBlocks = False
        if  self.workflowSpec.parameters.has_key("OnlyClosedBlocks"):
            onlyClosed =  str(
                self.workflowSpec.parameters["OnlyClosedBlocks"]).lower()
            if onlyClosed == "true":
                self.onlyClosedBlocks = True
        self.ownedJobSpecs = {}
        self.allowedBlocks = []
        self.allowedSites = []
        self.dbsUrl = getLocalDBSURL()
        self.splitType = \
                self.workflowSpec.parameters.get("SplitType", "file").lower()
        self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1))

        self.generators = GeneratorMaker()
        self.generators(self.workflowSpec.payload)

        self.pileupDatasets = {}
        #  //
        # // Does the workflow contain a block restriction??
        #//
        blockRestriction = \
             self.workflowSpec.parameters.get("OnlyBlocks", None)
        if blockRestriction != None:
            #  //
            # // restriction on blocks present, populate allowedBlocks list
            #//
            msg = "Block restriction provided in Workflow Spec:\n"
            msg += "%s\n" % blockRestriction
            blockList = blockRestriction.split(",")
            for block in blockList:
                if len(block.strip() ) > 0:
                    self.allowedBlocks.append(block.strip())

        #  //
        # // Does the workflow contain a site restriction??
        #//
        siteRestriction = \
           self.workflowSpec.parameters.get("OnlySites", None)          
        if siteRestriction != None:
            #  //
            # // restriction on sites present, populate allowedSites list
            #//
            msg = "Site restriction provided in Workflow Spec:\n"
            msg += "%s\n" % siteRestriction
            siteList = siteRestriction.split(",")
            for site in siteList:
                if len(site.strip() ) > 0:
                    self.allowedSites.append(site.strip())

        #  //
        # // Is the DBSURL contact information provided??
        #//

        value = self.workflowSpec.parameters.get("DBSURL", None)
        if value != None:
            self.dbsUrl = value

        if self.dbsUrl == None:
            msg = "Error: No DBSURL available for dataset:\n"
            msg += "Cant get local DBSURL and one not provided with workflow"
            raise RuntimeError, msg
            
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
        
        
    def __call__(self, jobDef):
        """
        _operator()_

        When called generate a new concrete job payload from the
        generic workflow and return it.
        The JobDef should be a JobDefinition with the input details
        including LFNs and event ranges etc.

        """
        newJobSpec = self.createJobSpec(jobDef)
        self.count += 1
        return newJobSpec


    def loadPileupDatasets(self):
        """
        _loadPileupDatasets_
        
        Are we dealing with pileup? If so pull in the file list
        
        """
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            self.pileupDatasets = createPileupDatasets(self.workflowSpec)
        return

    def loadPileupSites(self):
        """
        _loadPileupSites_
                                                                                                              
        Are we dealing with pileup? If so pull in the site list
                                                                                                              
        """
        sites = []
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            sites = getPileupSites(self.workflowSpec)
        return sites
                               

    def inputDataset(self):
        """
        _inputDataset_

        Extract the input Dataset from this workflow

        """
        topNode = self.workflowSpec.payload
        try:
            inputDataset = topNode._InputDatasets[-1]
        except StandardError, ex:
            msg = "Error extracting input dataset from Workflow:\n"
            msg += str(ex)
            logging.error(msg)
            return None

        return inputDataset.name()
コード例 #8
0
    print usage
    sys.exit(1)
if nevts  == None:
    print "--nevts option not provided \n The default of the workflow will be used."


## check workflow existing on disk 
workflow=os.path.expandvars(os.path.expanduser(workflow))
if not os.path.exists(workflow):
    print "Workflow not found: %s" % workflow
    sys.exit(1)

## get the workflow name
workflowSpec = WorkflowSpec()
workflowSpec.load(workflow)
workflowName = workflowSpec.workflowName()
workflowBase=os.path.basename(workflow)

## use MessageService
ms = MessageService()
## register message service instance as "Test"
ms.registerAs("TestSkim")

## Debug level
ms.publish("DatasetInjector:StartDebug","none")
ms.publish("JobCreator:StartDebug","none")
ms.publish("JobSubmitter:StartDebug","none")
ms.publish("DBSInterface:StartDebug","none")
ms.publish("ErrorHandler:StartDebug","none")
ms.publish("TrackingComponent:StartDebug","none")
ms.commit()
コード例 #9
0
ファイル: RelValPlugin.py プロジェクト: giffels/PRODAGENT
    def __call__(self, collectPayload):
        """
        _operator(collectPayload)_

        Given the dataset in the payload, callout to DBS
        to find the files to be harvested

        """
        msg = "RelValPlugin invoked for %s" % str(collectPayload)
        logging.info(msg)

        if collectPayload.get('Scenario', None) is None:
            msg = "RelValPlugin: Payload should provide a scenario."
            raise RuntimeError, msg    

        site = self.args.get("Site", "srm-cms.cern.ch")

        baseCache = os.path.join(self.args['ComponentDir'],
                                 "RelValPlugin")
        if not os.path.exists(baseCache):
            os.makedirs(baseCache)

        datasetCache = os.path.join(baseCache,
                                    collectPayload['PrimaryDataset'],
                                    collectPayload['ProcessedDataset'],
                                    collectPayload['DataTier'])

        if not os.path.exists(datasetCache):
            os.makedirs(datasetCache)

        workflowFile = os.path.join(
            datasetCache,
            "%s-%s-%s-DQMHarvest-Workflow.xml" % (
            collectPayload['PrimaryDataset'],
            collectPayload['ProcessedDataset'],
            collectPayload['DataTier'])
            )

        if not os.path.exists(workflowFile):
            msg = "No workflow found for dataset: %s\n " % (
                collectPayload.datasetPath())
            msg += "Looking up software version and generating workflow..."
            logging.info(msg)

            # Override Global Tag?
            if self.args.get("OverrideGlobalTag", None) is not None:
                globalTag = self.args['OverrideGlobalTag']
                msg = "Using Overrride for Global: %s" % globalTag
                logging.info(msg)
            # Global Tag provided in the payload?
            elif collectPayload.get('GlobalTag', None) is not None:
                globalTag = collectPayload['GlobalTag']
                msg = "Global tag found in payload: %s" % globalTag
                logging.info(msg)
            # Look up in DBS for Global Tag, use fallback GT as last resort
            else:
                globalTag = findGlobalTagForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'])

            # Override CMSSW Version
            if self.args.get("OverrideCMSSW", None) is not None:
                cmsswVersion = self.args['OverrideCMSSW']
                msg = "Using Override for CMSSW Version %s" % (
                    self.args['OverrideCMSSW'],)
                logging.info(msg)
            # CMSSW Version provided in the payload?
            elif collectPayload.get('CMSSWVersion', None) is not None:
                cmsswVersion = collectPayload['CMSSWVersion']
                msg = "CMSSW Version found in payload: %s" % cmsswVersion
                logging.info(msg)
            else:
                cmsswVersion = findVersionForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'],
                    collectPayload['RunNumber'])
                msg = "CMSSW Version for dataset/run\n"
                msg += " Dataset %s\n" % collectPayload.datasetPath()
                msg += " CMSSW Version = %s\n " % cmsswVersion
                logging.info(msg)

            workflowSpec = createHarvestingWorkflow(
                collectPayload.datasetPath(),
                site,
                self.args['CmsPath'],
                self.args['ScramArch'],
                cmsswVersion,
                globalTag,
                configFile=self.args['ConfigFile'],
                DQMServer=self.args['DQMServer'],
                proxyLocation=self.args['proxyLocation'],
                DQMCopyToCERN=self.args['DQMCopyToCERN'],
                doStageOut=self.args['DoStageOut'])
            
            workflowSpec.save(workflowFile)
            msg = "Created Harvesting Workflow:\n %s" % workflowFile
            msg += "\nThe following parameters were used:\n"
            msg += "DQMserver     ==> %s\n" % (self.args['DQMServer'])
            msg += "proxyLocation ==> %s\n" % (self.args['proxyLocation'])
            msg += "Stage Out     ==> %s\n" % (self.args['DoStageOut'])
            msg += "DQMCopyToCERN ==> %s\n" % (self.args['DQMCopyToCERN'])
            logging.info(msg)
            self.publishWorkflow(workflowFile, workflowSpec.workflowName())
        else:
            msg = "Loading existing workflow for dataset: %s\n " % (
                collectPayload.datasetPath())
            msg += " => %s\n" % workflowFile
            logging.info(msg)

            workflowSpec = WorkflowSpec()
            workflowSpec.load(workflowFile)

        job = {}
        jobSpec = workflowSpec.createJobSpec()
        jobName = "%s-%s-%s" % (
            workflowSpec.workflowName(),
            collectPayload['RunNumber'],
            time.strftime("%H-%M-%S-%d-%m-%y")
            )

        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Harvesting")

        # Adding specific parameters to the JobSpec
        jobSpec.parameters['RunNumber'] = collectPayload['RunNumber']  # How should we manage the run numbers?
        jobSpec.parameters['Scenario'] = collectPayload['Scenario']
        if collectPayload.get('RefHistKey', None) is not None:
            jobSpec.parameters['RefHistKey'] = collectPayload['RefHistKey']

        jobSpec.addWhitelistSite(site)
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))
        jobSpec.payload.cfgInterface.inputFiles.extend(
            getLFNForDataset(self.dbsUrl,
                             collectPayload['PrimaryDataset'],
                             collectPayload['ProcessedDataset'],
                             collectPayload['DataTier'],
                             run=collectPayload['RunNumber']))

        specCacheDir =  os.path.join(
            datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)

        jobSpec.save(jobSpecFile)

        job["JobSpecId"] = jobName
        job["JobSpecFile"] = jobSpecFile
        job['JobType'] = "Harvesting"
        job["WorkflowSpecId"] = workflowSpec.workflowName(),
        job["WorkflowPriority"] = 10
        job["Sites"] = [site]
        job["Run"] = collectPayload['RunNumber']
        job['WorkflowSpecFile'] = workflowFile

        msg = "Harvesting Job Created for\n"
        msg += " => Run:       %s\n" % collectPayload['RunNumber']
        msg += " => Primary:   %s\n" % collectPayload['PrimaryDataset']
        msg += " => Processed: %s\n" % collectPayload['ProcessedDataset']
        msg += " => Tier:      %s\n" % collectPayload['DataTier']
        msg += " => Workflow:  %s\n" % job['WorkflowSpecId']
        msg += " => Job:       %s\n" % job['JobSpecId']
        msg += " => Site:      %s\n" % job['Sites']
        logging.info(msg)

        return [job]
コード例 #10
0
for run in range(firstrun, lastrun+1):

    jobCreator.setRun(run)

    # if this is needed we should create
    # a JobCreator instance per run
    #workflowSpec.setWorkflowRunNumber(run)

    jobList = []
    for lumi in range(1,lumiperrun+1):

        jobCreator.setLumi(lumi)
        jobCreator.setEventsPerJob(eventsperjob)
        jobCreator.setFirstEvent(1+lumi*eventsperjob)

        jobName = "%s-%s-%s" % (workflowSpec.workflowName(),
                                run, lumi)

        jobSpec = workflowSpec.createJobSpec()

        jobSpecDir =  os.path.join("/data/hufnagel/parepack/StreamerMCRunning",
                                   str(run // 1000).zfill(4))
        if not os.path.exists(jobSpecDir):
            os.makedirs(jobSpecDir)

        jobSpecFileName = jobName + "-jobspec.xml"
        jobSpecFile = os.path.join(jobSpecDir, jobSpecFileName) 

        jobSpec.setJobName(jobName)

        # used for thresholds