コード例 #1
0
ファイル: RequestIterator.py プロジェクト: giffels/PRODAGENT
class RequestIterator:
    """
    _RequestIterator_

    Working from a Generic Workflow template, generate
    concrete jobs from it, keeping in-memory history

    """
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.count = 0
        self.runIncrement = 1
        self.currentJob = None
        self.sitePref = None
        self.pileupDatasets = {}
        self.ownedJobSpecs = {}
        
        #  //
        # // Initially hard coded, should be extracted from Component Config
        #//
        self.eventsPerJob = 10 
        
        self.workflowSpec = WorkflowSpec()
        try:
         self.workflowSpec.load(workflowSpecFile)
        except:
         logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile))
         return

        if self.workflowSpec.parameters.get("RunIncrement", None) != None:
            self.runIncrement = int(
                self.workflowSpec.parameters['RunIncrement']
                )

    
        self.generators = GeneratorMaker()
        self.workflowSpec.payload.operate(self.generators)
        
        
        
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
        


    def loadPileupDatasets(self):
        """
        _loadPileupDatasets_

        Are we dealing with pileup? If so pull in the file list
        
        """
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            self.pileupDatasets = createPileupDatasets(self.workflowSpec)
        return

    def loadPileupSites(self):
        """
        _loadPileupSites_
                                                                                                              
        Are we dealing with pileup? If so pull in the site list
                                                                                                              
        """
        sites = []
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            sites = getPileupSites(self.workflowSpec)
        return sites
                                                                                                              


            
    def __call__(self):
        """
        _operator()_

        When called generate a new concrete job payload from the
        generic workflow and return it.

        """
        newJobSpec = self.createJobSpec()
        self.count += self.runIncrement
        return newJobSpec


    def createJobSpec(self):
        """
        _createJobSpec_

        Load the WorkflowSpec object and generate a JobSpec from it

        """
        
        jobSpec = self.workflowSpec.createJobSpec()
        jobName = "%s-%s" % (
            self.workflowSpec.workflowName(),
            self.count,
            )
        self.currentJob = jobName
        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Processing")
        jobSpec.parameters['RunNumber'] = self.count

        
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))
        jobSpec.payload.operate(self.generateJobConfig)
        jobSpec.payload.operate(self.generateCmsGenConfig)
        specCacheDir =  os.path.join(
            self.specCache, str(self.count // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)
        self.ownedJobSpecs[jobName] = jobSpecFile

        
        #  //
        # // Add site pref if set
        #//
        if self.sitePref != None:
#
#AF:  Allow site pref to be a comma separated list of sites, each one
#     added in the Whitelist:
#            jobSpec.addWhitelistSite(self.sitePref)
          for siteWhite in self.sitePref.split(","): 
            jobSpec.addWhitelistSite(siteWhite)
            
        jobSpec.save(jobSpecFile)        
        return jobSpecFile
        
        
    def generateJobConfig(self, jobSpecNode):
        """
        _generateJobConfig_
        
        Operator to act on a JobSpecNode tree to convert the template
        config file into a JobSpecific Config File
                
        """
        if jobSpecNode.name not in self.generators.keys():
            return
        generator = self.generators[jobSpecNode.name]

        useOutputMaxEv = False
        if jobSpecNode.cfgInterface != None:
            outMaxEv = jobSpecNode.cfgInterface.maxEvents['output']
            if outMaxEv != None:
                useOutputMaxEv = True

        if useOutputMaxEv:
            jobCfg = generator(self.currentJob,
                               maxEventsWritten = self.eventsPerJob,
                               firstRun = self.count)
        else:
            jobCfg = generator(self.currentJob,
                               maxEvents = self.eventsPerJob,
                               firstRun = self.count)
        
        #  //
        # // Is there pileup for this node?
        #//
        if self.pileupDatasets.has_key(jobSpecNode.name):
            puDataset = self.pileupDatasets[jobSpecNode.name]
            logging.debug("Node: %s has a pileup dataset: %s" % (
                jobSpecNode.name,  puDataset.dataset,
                ))
            fileList = puDataset.getPileupFiles()
            jobCfg.pileupFiles = fileList

            
            
        
        jobSpecNode.cfgInterface = jobCfg
        return


    def generateCmsGenConfig(self, jobSpecNode):
        """
        _generateCmsGenConfig_

        Process CmsGen type nodes to insert maxEvents and run numbers
        for cmsGen jobs

        """
        if jobSpecNode.type != "CmsGen":
            return

        jobSpecNode.applicationControls['firstRun'] = self.count
        jobSpecNode.applicationControls['maxEvents'] = self.eventsPerJob
        jobSpecNode.applicationControls['randomSeed'] = randomSeed()
        jobSpecNode.applicationControls['fileName'] = "%s-%s.root" % (
            self.currentJob, jobSpecNode.name)
        jobSpecNode.applicationControls['logicalFileName'] = "%s-%s.root" % (
            self.currentJob, jobSpecNode.name)
        return
        

    def removeSpec(self, jobSpecId):
        """
        _removeSpec_

        Remove a Spec file when it has been successfully injected

        """
        if jobSpecId not in self.ownedJobSpecs.keys():
            return

        logging.info("Removing JobSpec For: %s" % jobSpecId)
        filename = self.ownedJobSpecs[jobSpecId]
        if os.path.exists(filename):
            os.remove(filename)
            del self.ownedJobSpecs[jobSpecId]
        return

        


    def save(self, directory):
        """
        _save_

        Persist this objects state into an XML file and save
        it in the directory provided

        """
        doc = IMProvDoc("RequestIterator")
        node = IMProvNode(self.workflowSpec.workflowName())
        doc.addNode(node)

        node.addNode(IMProvNode("Run", None, Value = str(self.count)))
        node.addNode(
            IMProvNode("EventsPerJob", None, Value = str(self.eventsPerJob))
            )
        node.addNode(IMProvNode("SitePref", None, Value = str(self.sitePref)))

        pu = IMProvNode("Pileup")
        node.addNode(pu)
        for key, value in self.pileupDatasets.items():
            puNode = value.save()
            puNode.attrs['PayloadNode'] = key
            pu.addNode(puNode)

        specs = IMProvNode("JobSpecs")
        node.addNode(specs)
        for key, val in self.ownedJobSpecs.items():
            specs.addNode(IMProvNode("JobSpec", val, ID = key))
            
        fname = os.path.join(
            directory,
            "%s-Persist.xml" % self.workflowSpec.workflowName()
            )
        handle = open(fname, 'w')
        handle.write(doc.makeDOMDocument().toprettyxml())
        handle.close()
        return


    def load(self, directory):
        """
        _load_

        Load this instance given the workflow and directory containing
        the persistency file

        """
        fname = os.path.join(
            directory,
            "%s-Persist.xml" % self.workflowSpec.workflowName()
            )

        try:
            node = loadIMProvFile(fname)
        except Exception, ex:
            msg = "ERROR: Corrupted Persistency File:\n"
            msg += "  => %s\n" % fname
            msg += "Cannot be read:\n  => %s\n" % str(ex)
            logging.error(msg)
            return
        

        qbase = "/RequestIterator/%s" % self.workflowSpec.workflowName()

        runQ = IMProvQuery("%s/Run[attribute(\"Value\")]" % qbase)
        eventQ = IMProvQuery("%s/EventsPerJob[attribute(\"Value\")]" % qbase)
        siteQ = IMProvQuery("%s/SitePref[attribute(\"Value\")]" % qbase)

        runVal = int(runQ(node)[-1])
        eventVal = int(eventQ(node)[-1])
        siteVal = str(siteQ(node)[-1])
        if siteVal.lower() == "none":
            siteVal = None

        self.count = runVal
        self.eventsPerJob = eventVal
        self.sitePref = siteVal
        
        puQ = IMProvQuery("%s/Pileup/*" % qbase)
        puNodes = puQ(node)
        for puNode in puNodes:
            payloadNode = str(puNode.attrs.get("PayloadNode"))
            puDataset = PileupDataset("dummy", 1)
            puDataset.load(puNode)
            self.pileupDatasets[payloadNode] = puDataset

        specQ = IMProvQuery("%s/JobSpecs/*" % qbase)
        specNodes = specQ(node)
        for specNode in specNodes:
            specId = str(specNode.attrs['ID'])
            specFile = str(specNode.chardata).strip()
            self.ownedJobSpecs[specId] = specFile

        return
コード例 #2
0
ファイル: DatasetIterator.py プロジェクト: giffels/PRODAGENT
class DatasetIterator:
    """
    _DatasetIterator_

    Working from a Generic Workflow template, generate
    concrete jobs from it, keeping in-memory history

    """
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.currentJob = None
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(workflowSpecFile)
        self.currentJobDef = None
        self.count = 0
        self.onlyClosedBlocks = False
        if  self.workflowSpec.parameters.has_key("OnlyClosedBlocks"):
            onlyClosed =  str(
                self.workflowSpec.parameters["OnlyClosedBlocks"]).lower()
            if onlyClosed == "true":
                self.onlyClosedBlocks = True
        self.ownedJobSpecs = {}
        self.allowedBlocks = []
        self.allowedSites = []
        self.dbsUrl = getLocalDBSURL()
        self.splitType = \
                self.workflowSpec.parameters.get("SplitType", "file").lower()
        self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1))

        self.generators = GeneratorMaker()
        self.generators(self.workflowSpec.payload)

        self.pileupDatasets = {}
        #  //
        # // Does the workflow contain a block restriction??
        #//
        blockRestriction = \
             self.workflowSpec.parameters.get("OnlyBlocks", None)
        if blockRestriction != None:
            #  //
            # // restriction on blocks present, populate allowedBlocks list
            #//
            msg = "Block restriction provided in Workflow Spec:\n"
            msg += "%s\n" % blockRestriction
            blockList = blockRestriction.split(",")
            for block in blockList:
                if len(block.strip() ) > 0:
                    self.allowedBlocks.append(block.strip())

        #  //
        # // Does the workflow contain a site restriction??
        #//
        siteRestriction = \
           self.workflowSpec.parameters.get("OnlySites", None)          
        if siteRestriction != None:
            #  //
            # // restriction on sites present, populate allowedSites list
            #//
            msg = "Site restriction provided in Workflow Spec:\n"
            msg += "%s\n" % siteRestriction
            siteList = siteRestriction.split(",")
            for site in siteList:
                if len(site.strip() ) > 0:
                    self.allowedSites.append(site.strip())

        #  //
        # // Is the DBSURL contact information provided??
        #//

        value = self.workflowSpec.parameters.get("DBSURL", None)
        if value != None:
            self.dbsUrl = value

        if self.dbsUrl == None:
            msg = "Error: No DBSURL available for dataset:\n"
            msg += "Cant get local DBSURL and one not provided with workflow"
            raise RuntimeError, msg
            
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
        
        
    def __call__(self, jobDef):
        """
        _operator()_

        When called generate a new concrete job payload from the
        generic workflow and return it.
        The JobDef should be a JobDefinition with the input details
        including LFNs and event ranges etc.

        """
        newJobSpec = self.createJobSpec(jobDef)
        self.count += 1
        return newJobSpec


    def loadPileupDatasets(self):
        """
        _loadPileupDatasets_
        
        Are we dealing with pileup? If so pull in the file list
        
        """
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            self.pileupDatasets = createPileupDatasets(self.workflowSpec)
        return

    def loadPileupSites(self):
        """
        _loadPileupSites_
                                                                                                              
        Are we dealing with pileup? If so pull in the site list
                                                                                                              
        """
        sites = []
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            sites = getPileupSites(self.workflowSpec)
        return sites
                               

    def inputDataset(self):
        """
        _inputDataset_

        Extract the input Dataset from this workflow

        """
        topNode = self.workflowSpec.payload
        try:
            inputDataset = topNode._InputDatasets[-1]
        except StandardError, ex:
            msg = "Error extracting input dataset from Workflow:\n"
            msg += str(ex)
            logging.error(msg)
            return None

        return inputDataset.name()