コード例 #1
0
ファイル: WatchedDatasets.py プロジェクト: giffels/PRODAGENT
    def add(self, workflowFile):
        """
        _add_
        
        Add a dataset to the list of watched datasets.
        
        Arguments:
            
          workflowFile -- the workflow specification file
        
        Return:
            
          the datasetId

        """

        # read the WorkflowSpecFile
        try:
            wfile = WorkflowSpec()
            wfile.load(workflowFile)

        # wrong dataset file
        except Exception, msg:
            raise InvalidDataset, \
                  "Error loading workflow specifications from %s" % workflowFile
コード例 #2
0
ファイル: RuntimePSetPrep.py プロジェクト: giffels/PRODAGENT
class JobSpecExpander:

    def __init__(self, jobSpecFile):
        self.jobSpec = JobSpec()
        self.jobSpec.load(jobSpecFile)
        self.taskState = TaskState(os.getcwd())
        self.taskState.loadRunResDB()
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"])
        
        self.config = self.taskState.configurationDict()

        finder = NodeFinder(self.taskState.taskName())
        self.jobSpec.payload.operate(finder)
        self.jobSpecNode = finder.result

        wffinder = NodeFinder(self.taskState.taskName())
        self.workflowSpec.payload.operate(wffinder)
        self.workflowNode = wffinder.result

        if self.jobSpecNode.jobType != "Merge":
            if self.config.has_key('Configuration'):
                try:
                    self.createPSet()
                except Exception, ex:
                    msg = "Unable to generate cmsRun Config from JobSpec:\n"
                    msg += str(ex)
                    print msg
                    badfile = open("exit.status", 'w')
                    badfile.write("10040")
                    badfile.close()
        else:
コード例 #3
0
def getCMSSoft(work,reverse=False):
    """
    opens the workflowfile and gets the CMSSoft version
    if reverse, returns a map between CMSSoft version and real workflowname
    """

    new_work={}
    workflowSpec = WorkflowSpec()
    for fil in work:
        try:
            workflowSpec.load(fil)
            cmssw=workflowSpec.payload.application['Version']
            name=workflowSpec.parameters['WorkflowName']
            if reverse:
                if not new_work.has_key(cmssw):
                    new_work[cmssw]=[]
                    new_work[cmssw].append(name)
            else:
                new_work[name]=cmssw
        except:
            """
            something went wrong

            """
            msg="WorkflowConstraints getCMSSoft: something went wrong while handling file "+fil
            print(msg)

    return new_work
コード例 #4
0
ファイル: T0ASTPlugin.py プロジェクト: giffels/PRODAGENT
    def createWorkflow(self, runNumber, primaryDataset,
                       processedDataset, dataTier):
        """
        _createWorkflow_

        Create a workflow for a given run and primary dataset.  If the workflow
        has been created previously, load it and use it.
        """
        jobCache = os.path.join(self.args["ComponentDir"], "T0ASTPlugin",
                                "Run" + runNumber)
        if not os.path.exists(jobCache):
            os.makedirs(jobCache)

        workflowSpecFileName = "DQMHarvest-Run%s-%s-workflow.xml" % (runNumber, primaryDataset)
        workflowSpecPath = os.path.join(jobCache, workflowSpecFileName)

        if os.path.exists(workflowSpecPath):
            msg = "Loading existing workflow for dataset: %s\n " % primaryDataset
            msg += " => %s\n" % workflowSpecPath
            logging.info(msg)

            workflowSpec = WorkflowSpec()
            workflowSpec.load(workflowSpecPath)
            return (workflowSpec, workflowSpecPath)
            
        msg = "No workflow found for dataset: %s\n " % primaryDataset
        msg += "Looking up software version and generating workflow..."

        recoConfig = self.t0astWrapper.listRecoConfig(runNumber, primaryDataset)

        if not recoConfig["DO_RECO"]:
            logging.info("RECO disabled for dataset %s" % primaryDataset)
            return (None, None)

        globalTag = self.args.get("OverrideGlobalTag", None)
        if globalTag == None:
            globalTag = recoConfig["GLOBAL_TAG"]
            
        cmsswVersion = self.args.get("OverrideCMSSW", None)
        if cmsswVersion == None:
            cmsswVersion = recoConfig["CMSSW_VERSION"]

        datasetPath = "/%s/%s/%s" % (primaryDataset, processedDataset, dataTier)
        workflowSpec = createHarvestingWorkflow(datasetPath, self.site, 
                                                self.args["CmsPath"],
                                                self.args["ScramArch"],
                                                cmsswVersion, globalTag,
                                                configFile=self.args["ConfigFile"],
                                                DQMServer=self.args['DQMServer'],
                                                proxyLocation=self.args['proxyLocation'],
                                                DQMCopyToCERN=self.args['DQMCopyToCERN'],
                                                doStageOut=self.args['DoStageOut'])
        
        
        workflowSpec.save(workflowSpecPath)
        msg = "Created Harvesting Workflow:\n %s" % workflowSpecPath
        logging.info(msg)
        self.publishWorkflow(workflowSpecPath, workflowSpec.workflowName())
        return (workflowSpec, workflowSpecPath)
コード例 #5
0
ファイル: RelValSpecMgr.py プロジェクト: giffels/PRODAGENT
    def makeJobs(self, testInstance):
        """
        _makeJobs_

        Create Job Specs for the test instance provided

        """
        logging.info("Creating Jobs for test %s at site %s" % (
            testInstance['Name'],
            testInstance['Site'])
                     )
        testName = testInstance['WorkflowSpecId']
        specInstance = WorkflowSpec()
        specInstance.load(testInstance['WorkflowSpecFile'])

        if testInstance['InputDataset'] == None:
            initialRun = self.jobCounts.get(testInstance['Name'], 1)
            factory = RequestJobFactory(
                specInstance,
                testInstance['WorkingDir'],
                testInstance['TotalEvents'],
                InitialRun = initialRun,
                EventsPerJob = testInstance['EventsPerJob'],
                Sites = [testInstance['Site']])

            jobsList = factory()
            self.jobCounts[testInstance['Name']] += len(jobsList)
        else:

            factory = DatasetJobFactory(
                specInstance,
                testInstance['WorkingDir'],
                specInstance.parameters['DBSURL'],
                )

            jobsList = factory()
            self.jobCounts[testInstance['Name']] += len(jobsList)


        msg = "Created %s jobs:\n" % len(jobsList)

        for job in jobsList:
            jobSpecFile = job['JobSpecFile']
            jobSpecId = job['JobSpecId']
            msg += "  %s\n" % jobSpecId
            testInstance['JobSpecs'][jobSpecId] = jobSpecFile



        logging.info(msg)



        return
コード例 #6
0
ファイル: InjectTestLCG.py プロジェクト: giffels/PRODAGENT
def GoodWorkflow(workflow):
   """
   Check if workflow can be loaded
   """
   RequestDir,firstrun = getRequestInjectorConfig()
   workflowCache="%s/WorkflowCache"%RequestDir
   workflowSpec = WorkflowSpec()
   try:
      workflowSpec.load(workflow)
   except:
      return False
   return True
コード例 #7
0
class FactoryInterface:
    """
    _FactoryInterface_

    JobSpec Factory Interface defintion & common utils for
    all job spec factory generators

    """
    def __init__(self, workflowSpec):
        # or use isinstance(WorkflowSpec) if need to include sub classes
        if workflowSpec.__class__ is WorkflowSpec:
            self.workflow = workflowSpec
        else:
            self.workflow = WorkflowSpec()
            self.workflow.load(workflowSpec)
コード例 #8
0
ファイル: PluginInterface.py プロジェクト: giffels/PRODAGENT
    def loadWorkflow(self, specFile):
        """
        _loadWorkflow_

        Helper method, since every plugin will have to do
        something with a workflow

        """
        spec = WorkflowSpec()
        try:
            spec.load(specFile)
        except Exception, ex:
            msg = "Unable to read workflow spec file:\n%s\n" % specFile
            msg += str(ex)
            raise RuntimeError, msg
コード例 #9
0
def createJobSpec(jobSpecId,
                  workflowSpecFile,
                  filename,
                  runNumber,
                  eventCount,
                  firstEvent=None,
                  saveString=False,
                  loadString=True):

    #  //
    # // Load workflow
    #//
    workflowSpec = WorkflowSpec()
    if loadString:
        workflowSpec.loadString(workflowSpecFile)
    else:
        workflowSpec.load(workflowSpecFile)

    #  //
    # // Create JobSpec
    #//
    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (workflowSpec.workflowName(), runNumber)

    #jobSpec.setJobName(jobName)
    jobSpec.setJobName(jobSpecId)
    jobSpec.setJobType("Processing")
    jobSpec.parameters['RunNumber'] = runNumber
    jobSpec.parameters['EventCount'] = eventCount

    jobSpec.payload.operate(DefaultLFNMaker(jobSpec))

    if firstEvent != None:
        jobSpec.parameters['FirstEvent'] = firstEvent

    cfgMaker = ConfigGenerator(jobSpec)
    jobSpec.payload.operate(cfgMaker)

    if saveString:
        return jobSpec.saveString()
    jobSpec.save(filename)
    return
コード例 #10
0
ファイル: EventJobSpec.py プロジェクト: PerilousApricot/CRAB2
def createJobSpec(jobSpecId,workflowSpecFile, filename, runNumber, eventCount,  firstEvent = None,saveString=False,loadString=True):

    #  //
    # // Load workflow
    #//
    workflowSpec = WorkflowSpec()
    if loadString:
        workflowSpec.loadString(workflowSpecFile)
    else:
        workflowSpec.load(workflowSpecFile)

    

    #  //
    # // Create JobSpec
    #//
    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (
        workflowSpec.workflowName(),
        runNumber
            )


    #jobSpec.setJobName(jobName)
    jobSpec.setJobName(jobSpecId)
    jobSpec.setJobType("Processing")
    jobSpec.parameters['RunNumber'] = runNumber
    jobSpec.parameters['EventCount'] = eventCount

    jobSpec.payload.operate(DefaultLFNMaker(jobSpec))

    if firstEvent != None:
        jobSpec.parameters['FirstEvent'] = firstEvent

    cfgMaker = ConfigGenerator(jobSpec)
    jobSpec.payload.operate(cfgMaker)

    if saveString:    
       return jobSpec.saveString()
    jobSpec.save(filename)
    return
コード例 #11
0
ファイル: RelValPlugin.py プロジェクト: giffels/PRODAGENT
    def __call__(self, collectPayload):
        """
        _operator(collectPayload)_

        Given the dataset in the payload, callout to DBS
        to find the files to be harvested

        """
        msg = "RelValPlugin invoked for %s" % str(collectPayload)
        logging.info(msg)

        if collectPayload.get('Scenario', None) is None:
            msg = "RelValPlugin: Payload should provide a scenario."
            raise RuntimeError, msg    

        site = self.args.get("Site", "srm-cms.cern.ch")

        baseCache = os.path.join(self.args['ComponentDir'],
                                 "RelValPlugin")
        if not os.path.exists(baseCache):
            os.makedirs(baseCache)

        datasetCache = os.path.join(baseCache,
                                    collectPayload['PrimaryDataset'],
                                    collectPayload['ProcessedDataset'],
                                    collectPayload['DataTier'])

        if not os.path.exists(datasetCache):
            os.makedirs(datasetCache)

        workflowFile = os.path.join(
            datasetCache,
            "%s-%s-%s-DQMHarvest-Workflow.xml" % (
            collectPayload['PrimaryDataset'],
            collectPayload['ProcessedDataset'],
            collectPayload['DataTier'])
            )

        if not os.path.exists(workflowFile):
            msg = "No workflow found for dataset: %s\n " % (
                collectPayload.datasetPath())
            msg += "Looking up software version and generating workflow..."
            logging.info(msg)

            # Override Global Tag?
            if self.args.get("OverrideGlobalTag", None) is not None:
                globalTag = self.args['OverrideGlobalTag']
                msg = "Using Overrride for Global: %s" % globalTag
                logging.info(msg)
            # Global Tag provided in the payload?
            elif collectPayload.get('GlobalTag', None) is not None:
                globalTag = collectPayload['GlobalTag']
                msg = "Global tag found in payload: %s" % globalTag
                logging.info(msg)
            # Look up in DBS for Global Tag, use fallback GT as last resort
            else:
                globalTag = findGlobalTagForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'])

            # Override CMSSW Version
            if self.args.get("OverrideCMSSW", None) is not None:
                cmsswVersion = self.args['OverrideCMSSW']
                msg = "Using Override for CMSSW Version %s" % (
                    self.args['OverrideCMSSW'],)
                logging.info(msg)
            # CMSSW Version provided in the payload?
            elif collectPayload.get('CMSSWVersion', None) is not None:
                cmsswVersion = collectPayload['CMSSWVersion']
                msg = "CMSSW Version found in payload: %s" % cmsswVersion
                logging.info(msg)
            else:
                cmsswVersion = findVersionForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'],
                    collectPayload['RunNumber'])
                msg = "CMSSW Version for dataset/run\n"
                msg += " Dataset %s\n" % collectPayload.datasetPath()
                msg += " CMSSW Version = %s\n " % cmsswVersion
                logging.info(msg)

            workflowSpec = createHarvestingWorkflow(
                collectPayload.datasetPath(),
                site,
                self.args['CmsPath'],
                self.args['ScramArch'],
                cmsswVersion,
                globalTag,
                configFile=self.args['ConfigFile'],
                DQMServer=self.args['DQMServer'],
                proxyLocation=self.args['proxyLocation'],
                DQMCopyToCERN=self.args['DQMCopyToCERN'],
                doStageOut=self.args['DoStageOut'])
            
            workflowSpec.save(workflowFile)
            msg = "Created Harvesting Workflow:\n %s" % workflowFile
            msg += "\nThe following parameters were used:\n"
            msg += "DQMserver     ==> %s\n" % (self.args['DQMServer'])
            msg += "proxyLocation ==> %s\n" % (self.args['proxyLocation'])
            msg += "Stage Out     ==> %s\n" % (self.args['DoStageOut'])
            msg += "DQMCopyToCERN ==> %s\n" % (self.args['DQMCopyToCERN'])
            logging.info(msg)
            self.publishWorkflow(workflowFile, workflowSpec.workflowName())
        else:
            msg = "Loading existing workflow for dataset: %s\n " % (
                collectPayload.datasetPath())
            msg += " => %s\n" % workflowFile
            logging.info(msg)

            workflowSpec = WorkflowSpec()
            workflowSpec.load(workflowFile)

        job = {}
        jobSpec = workflowSpec.createJobSpec()
        jobName = "%s-%s-%s" % (
            workflowSpec.workflowName(),
            collectPayload['RunNumber'],
            time.strftime("%H-%M-%S-%d-%m-%y")
            )

        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Harvesting")

        # Adding specific parameters to the JobSpec
        jobSpec.parameters['RunNumber'] = collectPayload['RunNumber']  # How should we manage the run numbers?
        jobSpec.parameters['Scenario'] = collectPayload['Scenario']
        if collectPayload.get('RefHistKey', None) is not None:
            jobSpec.parameters['RefHistKey'] = collectPayload['RefHistKey']

        jobSpec.addWhitelistSite(site)
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))
        jobSpec.payload.cfgInterface.inputFiles.extend(
            getLFNForDataset(self.dbsUrl,
                             collectPayload['PrimaryDataset'],
                             collectPayload['ProcessedDataset'],
                             collectPayload['DataTier'],
                             run=collectPayload['RunNumber']))

        specCacheDir =  os.path.join(
            datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)

        jobSpec.save(jobSpecFile)

        job["JobSpecId"] = jobName
        job["JobSpecFile"] = jobSpecFile
        job['JobType'] = "Harvesting"
        job["WorkflowSpecId"] = workflowSpec.workflowName(),
        job["WorkflowPriority"] = 10
        job["Sites"] = [site]
        job["Run"] = collectPayload['RunNumber']
        job['WorkflowSpecFile'] = workflowFile

        msg = "Harvesting Job Created for\n"
        msg += " => Run:       %s\n" % collectPayload['RunNumber']
        msg += " => Primary:   %s\n" % collectPayload['PrimaryDataset']
        msg += " => Processed: %s\n" % collectPayload['ProcessedDataset']
        msg += " => Tier:      %s\n" % collectPayload['DataTier']
        msg += " => Workflow:  %s\n" % job['WorkflowSpecId']
        msg += " => Job:       %s\n" % job['JobSpecId']
        msg += " => Site:      %s\n" % job['Sites']
        logging.info(msg)

        return [job]
コード例 #12
0
import StageOut.Utilities as StageOutUtils
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec

if __name__ == '__main__':
    msg = "******RuntimeStageOutFailure Invoked*****"

    state = TaskState(os.getcwd())
    state.loadRunResDB()
    config = state.configurationDict()

    #  //
    # // find inputs by locating the task for which we are staging out
    #//  and loading its TaskState

    workflow = WorkflowSpec()
    workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC'])
    stageOutFor, override, controls = StageOutUtils.getStageOutConfig(
        workflow, state.taskName())

    inputTasks = stageOutFor
    for inputTask in inputTasks:

        inputState = getTaskState(inputTask)
        if inputState == None:
            msg = "Input State: %s Not found, skipping..." % inputTask
            continue

        inputReport = inputState.getJobReport()

        inputReport.status = "Failed"
        if inputReport.exitCode in (0, "0"):
コード例 #13
0
ファイル: RequestIterator.py プロジェクト: giffels/PRODAGENT
class RequestIterator:
    """
    _RequestIterator_

    Working from a Generic Workflow template, generate
    concrete jobs from it, keeping in-memory history

    """
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.count = 0
        self.runIncrement = 1
        self.currentJob = None
        self.sitePref = None
        self.pileupDatasets = {}
        self.ownedJobSpecs = {}
        
        #  //
        # // Initially hard coded, should be extracted from Component Config
        #//
        self.eventsPerJob = 10 
        
        self.workflowSpec = WorkflowSpec()
        try:
         self.workflowSpec.load(workflowSpecFile)
        except:
         logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile))
         return

        if self.workflowSpec.parameters.get("RunIncrement", None) != None:
            self.runIncrement = int(
                self.workflowSpec.parameters['RunIncrement']
                )

    
        self.generators = GeneratorMaker()
        self.workflowSpec.payload.operate(self.generators)
        
        
        
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
        


    def loadPileupDatasets(self):
        """
        _loadPileupDatasets_

        Are we dealing with pileup? If so pull in the file list
        
        """
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            self.pileupDatasets = createPileupDatasets(self.workflowSpec)
        return

    def loadPileupSites(self):
        """
        _loadPileupSites_
                                                                                                              
        Are we dealing with pileup? If so pull in the site list
                                                                                                              
        """
        sites = []
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            sites = getPileupSites(self.workflowSpec)
        return sites
                                                                                                              


            
    def __call__(self):
        """
        _operator()_

        When called generate a new concrete job payload from the
        generic workflow and return it.

        """
        newJobSpec = self.createJobSpec()
        self.count += self.runIncrement
        return newJobSpec


    def createJobSpec(self):
        """
        _createJobSpec_

        Load the WorkflowSpec object and generate a JobSpec from it

        """
        
        jobSpec = self.workflowSpec.createJobSpec()
        jobName = "%s-%s" % (
            self.workflowSpec.workflowName(),
            self.count,
            )
        self.currentJob = jobName
        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Processing")
        jobSpec.parameters['RunNumber'] = self.count

        
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))
        jobSpec.payload.operate(self.generateJobConfig)
        jobSpec.payload.operate(self.generateCmsGenConfig)
        specCacheDir =  os.path.join(
            self.specCache, str(self.count // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)
        self.ownedJobSpecs[jobName] = jobSpecFile

        
        #  //
        # // Add site pref if set
        #//
        if self.sitePref != None:
#
#AF:  Allow site pref to be a comma separated list of sites, each one
#     added in the Whitelist:
#            jobSpec.addWhitelistSite(self.sitePref)
          for siteWhite in self.sitePref.split(","): 
            jobSpec.addWhitelistSite(siteWhite)
            
        jobSpec.save(jobSpecFile)        
        return jobSpecFile
        
        
    def generateJobConfig(self, jobSpecNode):
        """
        _generateJobConfig_
        
        Operator to act on a JobSpecNode tree to convert the template
        config file into a JobSpecific Config File
                
        """
        if jobSpecNode.name not in self.generators.keys():
            return
        generator = self.generators[jobSpecNode.name]

        useOutputMaxEv = False
        if jobSpecNode.cfgInterface != None:
            outMaxEv = jobSpecNode.cfgInterface.maxEvents['output']
            if outMaxEv != None:
                useOutputMaxEv = True

        if useOutputMaxEv:
            jobCfg = generator(self.currentJob,
                               maxEventsWritten = self.eventsPerJob,
                               firstRun = self.count)
        else:
            jobCfg = generator(self.currentJob,
                               maxEvents = self.eventsPerJob,
                               firstRun = self.count)
        
        #  //
        # // Is there pileup for this node?
        #//
        if self.pileupDatasets.has_key(jobSpecNode.name):
            puDataset = self.pileupDatasets[jobSpecNode.name]
            logging.debug("Node: %s has a pileup dataset: %s" % (
                jobSpecNode.name,  puDataset.dataset,
                ))
            fileList = puDataset.getPileupFiles()
            jobCfg.pileupFiles = fileList

            
            
        
        jobSpecNode.cfgInterface = jobCfg
        return


    def generateCmsGenConfig(self, jobSpecNode):
        """
        _generateCmsGenConfig_

        Process CmsGen type nodes to insert maxEvents and run numbers
        for cmsGen jobs

        """
        if jobSpecNode.type != "CmsGen":
            return

        jobSpecNode.applicationControls['firstRun'] = self.count
        jobSpecNode.applicationControls['maxEvents'] = self.eventsPerJob
        jobSpecNode.applicationControls['randomSeed'] = randomSeed()
        jobSpecNode.applicationControls['fileName'] = "%s-%s.root" % (
            self.currentJob, jobSpecNode.name)
        jobSpecNode.applicationControls['logicalFileName'] = "%s-%s.root" % (
            self.currentJob, jobSpecNode.name)
        return
        

    def removeSpec(self, jobSpecId):
        """
        _removeSpec_

        Remove a Spec file when it has been successfully injected

        """
        if jobSpecId not in self.ownedJobSpecs.keys():
            return

        logging.info("Removing JobSpec For: %s" % jobSpecId)
        filename = self.ownedJobSpecs[jobSpecId]
        if os.path.exists(filename):
            os.remove(filename)
            del self.ownedJobSpecs[jobSpecId]
        return

        


    def save(self, directory):
        """
        _save_

        Persist this objects state into an XML file and save
        it in the directory provided

        """
        doc = IMProvDoc("RequestIterator")
        node = IMProvNode(self.workflowSpec.workflowName())
        doc.addNode(node)

        node.addNode(IMProvNode("Run", None, Value = str(self.count)))
        node.addNode(
            IMProvNode("EventsPerJob", None, Value = str(self.eventsPerJob))
            )
        node.addNode(IMProvNode("SitePref", None, Value = str(self.sitePref)))

        pu = IMProvNode("Pileup")
        node.addNode(pu)
        for key, value in self.pileupDatasets.items():
            puNode = value.save()
            puNode.attrs['PayloadNode'] = key
            pu.addNode(puNode)

        specs = IMProvNode("JobSpecs")
        node.addNode(specs)
        for key, val in self.ownedJobSpecs.items():
            specs.addNode(IMProvNode("JobSpec", val, ID = key))
            
        fname = os.path.join(
            directory,
            "%s-Persist.xml" % self.workflowSpec.workflowName()
            )
        handle = open(fname, 'w')
        handle.write(doc.makeDOMDocument().toprettyxml())
        handle.close()
        return


    def load(self, directory):
        """
        _load_

        Load this instance given the workflow and directory containing
        the persistency file

        """
        fname = os.path.join(
            directory,
            "%s-Persist.xml" % self.workflowSpec.workflowName()
            )

        try:
            node = loadIMProvFile(fname)
        except Exception, ex:
            msg = "ERROR: Corrupted Persistency File:\n"
            msg += "  => %s\n" % fname
            msg += "Cannot be read:\n  => %s\n" % str(ex)
            logging.error(msg)
            return
        

        qbase = "/RequestIterator/%s" % self.workflowSpec.workflowName()

        runQ = IMProvQuery("%s/Run[attribute(\"Value\")]" % qbase)
        eventQ = IMProvQuery("%s/EventsPerJob[attribute(\"Value\")]" % qbase)
        siteQ = IMProvQuery("%s/SitePref[attribute(\"Value\")]" % qbase)

        runVal = int(runQ(node)[-1])
        eventVal = int(eventQ(node)[-1])
        siteVal = str(siteQ(node)[-1])
        if siteVal.lower() == "none":
            siteVal = None

        self.count = runVal
        self.eventsPerJob = eventVal
        self.sitePref = siteVal
        
        puQ = IMProvQuery("%s/Pileup/*" % qbase)
        puNodes = puQ(node)
        for puNode in puNodes:
            payloadNode = str(puNode.attrs.get("PayloadNode"))
            puDataset = PileupDataset("dummy", 1)
            puDataset.load(puNode)
            self.pileupDatasets[payloadNode] = puDataset

        specQ = IMProvQuery("%s/JobSpecs/*" % qbase)
        specNodes = specQ(node)
        for specNode in specNodes:
            specId = str(specNode.attrs['ID'])
            specFile = str(specNode.chardata).strip()
            self.ownedJobSpecs[specId] = specFile

        return
コード例 #14
0
ファイル: fixDB.py プロジェクト: giffels/PRODAGENT
logHandler.setFormatter(logFormatter)
logging.getLogger().addHandler(logHandler)
logging.getLogger().setLevel(logging.INFO)
Dataset.setLogging(logging)

database = MergeSensorDB()
Dataset.setDatabase(database)

workflowFile = sys.argv[1]

print "Updating DB for workflow: ", workflowFile

# read the WorkflowSpecFile
try:
    wfile = WorkflowSpec()
    wfile.load(workflowFile)

# wrong dataset file
except Exception, msg:
    print "Error loading workflow specifications from %s: %s" \
          % (workflowFile, msg)
    sys.exit(1)

# get output modules
try:
    outputDatasetsList = wfile.outputDatasets()

    outputModules = [outDS['OutputModuleName'] \
                     for outDS in outputDatasetsList]

    # remove duplicates
コード例 #15
0
ファイル: DatasetIterator.py プロジェクト: giffels/PRODAGENT
class DatasetIterator:
    """
    _DatasetIterator_

    Working from a Generic Workflow template, generate
    concrete jobs from it, keeping in-memory history

    """
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.currentJob = None
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(workflowSpecFile)
        self.currentJobDef = None
        self.count = 0
        self.onlyClosedBlocks = False
        if  self.workflowSpec.parameters.has_key("OnlyClosedBlocks"):
            onlyClosed =  str(
                self.workflowSpec.parameters["OnlyClosedBlocks"]).lower()
            if onlyClosed == "true":
                self.onlyClosedBlocks = True
        self.ownedJobSpecs = {}
        self.allowedBlocks = []
        self.allowedSites = []
        self.dbsUrl = getLocalDBSURL()
        self.splitType = \
                self.workflowSpec.parameters.get("SplitType", "file").lower()
        self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1))

        self.generators = GeneratorMaker()
        self.generators(self.workflowSpec.payload)

        self.pileupDatasets = {}
        #  //
        # // Does the workflow contain a block restriction??
        #//
        blockRestriction = \
             self.workflowSpec.parameters.get("OnlyBlocks", None)
        if blockRestriction != None:
            #  //
            # // restriction on blocks present, populate allowedBlocks list
            #//
            msg = "Block restriction provided in Workflow Spec:\n"
            msg += "%s\n" % blockRestriction
            blockList = blockRestriction.split(",")
            for block in blockList:
                if len(block.strip() ) > 0:
                    self.allowedBlocks.append(block.strip())

        #  //
        # // Does the workflow contain a site restriction??
        #//
        siteRestriction = \
           self.workflowSpec.parameters.get("OnlySites", None)          
        if siteRestriction != None:
            #  //
            # // restriction on sites present, populate allowedSites list
            #//
            msg = "Site restriction provided in Workflow Spec:\n"
            msg += "%s\n" % siteRestriction
            siteList = siteRestriction.split(",")
            for site in siteList:
                if len(site.strip() ) > 0:
                    self.allowedSites.append(site.strip())

        #  //
        # // Is the DBSURL contact information provided??
        #//

        value = self.workflowSpec.parameters.get("DBSURL", None)
        if value != None:
            self.dbsUrl = value

        if self.dbsUrl == None:
            msg = "Error: No DBSURL available for dataset:\n"
            msg += "Cant get local DBSURL and one not provided with workflow"
            raise RuntimeError, msg
            
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
        
        
    def __call__(self, jobDef):
        """
        _operator()_

        When called generate a new concrete job payload from the
        generic workflow and return it.
        The JobDef should be a JobDefinition with the input details
        including LFNs and event ranges etc.

        """
        newJobSpec = self.createJobSpec(jobDef)
        self.count += 1
        return newJobSpec


    def loadPileupDatasets(self):
        """
        _loadPileupDatasets_
        
        Are we dealing with pileup? If so pull in the file list
        
        """
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            self.pileupDatasets = createPileupDatasets(self.workflowSpec)
        return

    def loadPileupSites(self):
        """
        _loadPileupSites_
                                                                                                              
        Are we dealing with pileup? If so pull in the site list
                                                                                                              
        """
        sites = []
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            sites = getPileupSites(self.workflowSpec)
        return sites
                               

    def inputDataset(self):
        """
        _inputDataset_

        Extract the input Dataset from this workflow

        """
        topNode = self.workflowSpec.payload
        try:
            inputDataset = topNode._InputDatasets[-1]
        except StandardError, ex:
            msg = "Error extracting input dataset from Workflow:\n"
            msg += str(ex)
            logging.error(msg)
            return None

        return inputDataset.name()
コード例 #16
0
ファイル: RuntimeStageOut.py プロジェクト: giffels/PRODAGENT
def stageOut():
    """
    _stageOut_

    Main function for this module. Loads data from the task
    and manages the stage out process for a single attempt

    """
    state = TaskState(os.getcwd())
    state.loadRunResDB()

    workflow = WorkflowSpec()
    workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC'])

    jobSpecFile = os.environ.get('PRODAGENT_JOBSPEC')
    jobSpecId = None
    if jobSpecFile is not None:
        jobSpec = JobSpec()
        jobSpec.load(jobSpecFile)
        jobSpecId = jobSpec.parameters.get('JobName')


    print workflow
    print state.taskName()
    print jobSpecId

    stageOutFor, override, controls = StageOutUtils.getStageOutConfig(
        workflow, state.taskName())

    toplevelReport = os.path.join(os.environ['PRODAGENT_JOB_DIR'],
                                  "FrameworkJobReport.xml")


    exitCode = 0
    #  //
    # // find inputs by locating the task for which we are staging out
    #//  and loading its TaskState
    for inputTask in stageOutFor:
        print "Attempting to stage out files for node %s" % inputTask
        try:
            inputState = getTaskState(inputTask)
            msg = "Loaded Input Task: %s " % inputTask
        except Exception, ex:
            msg = "Error load for TaskState for task %s" % inputTask
            msg += "%s\n" % str(ex)
            inputState = None
        print msg

        if inputState == None:
            # exit with init error
            # generate failure report in this dir, since cant find
            # input state dir
            inputReport = FwkJobReport()
            inputReport.name = inputTask
            inputReport.jobSpecId = jobSpecId
            exitCode = 60311
            errRep = inputReport.addError(
                60311, "TaskStateError")
            errRep['Description'] = msg
            inputReport.status = "Failed"
            inputReport.exitCode = 60311
            updateReport(toplevelReport, inputReport)
            print "TaskState is None, exiting..."
            return exitCode

        try:
            inputReport = inputState.getJobReport()
            msg = "Loaded JobReport for Task : %s\n" % inputTask
            msg += "File: %s\n" % inputState.jobReport
        except Exception, ex:
            msg = "Error loading input report : %s" % str(ex)
            inputReport = None
コード例 #17
0
import os
import pickle
from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWAPILoader import CMSSWAPILoader
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec
from ProdCommon.MCPayloads.DatasetTools import getOutputDatasetsWithPSet


specfile = "/uscms/home/gutsche/CSA08-JetET110-CSA08_S43_S43_rereco_may19_PIC_v1-Workflow.xml"

rawCfgFile = "%s.raw.cfg" % os.path.basename(specfile)
origCfgFile = "%s.orig.cfg" % os.path.basename(specfile)
dbsCfgFile = "%s.dbs.cfg" % os.path.basename(specfile)

spec = WorkflowSpec()
spec.load(specfile)


rawCfg = spec.payload.cfgInterface.rawCfg
originalCfg = spec.payload.cfgInterface.originalCfg

dbsDatasets = getOutputDatasetsWithPSet(spec.payload)

handle = open(dbsCfgFile, 'w')
handle.write(
    dbsDatasets[0]['PSetContent']
    )
handle.close()

handle = open(origCfgFile, 'w')
handle.write(originalCfg)
コード例 #18
0
ファイル: ResultsStatus.py プロジェクト: giffels/PRODAGENT
class ResultsStatus:
    """
    _ResultsStatus_

    Object to retrieve and compute the overall state of a Results
    Workflow

    """
    def __init__(self, config, msgSvcRef,  **workflowDetails):
        self.configuration = config
        self.msgSvcRef = msgSvcRef
        self.workflowDetails = workflowDetails
        self.workflow = workflowDetails['id']
        self.workflowFile = workflowDetails['workflow_spec_file']
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(self.workflowFile)

        self.doMigration = self.configuration.get("MigrateToGlobal", True)
        self.doInjection = self.configuration.get("InjectToPhEDEx", True)

    def __call__(self):
        """
        _operator()_

        Evaluate the status of this workflow from the WorkflowEntities
        data and publish any events that are triggered

        """

        if self.processingComplete():
            logging.info("Processing Complete for %s" % self.workflow)
            for dataset in self.unmergedDatasets():
                if self.doMigration:
                    logging.debug(
                        "Publishing MigrateToGlobal for %s" % dataset)
                    self.msgSvcRef.publish(
                        "DBSInterface:MigrateDatasetToGlobal",
                        dataset)
                    self.msgSvcRef.commit()
                if self.doInjection:
                    logging.debug("Publishing PollMigration for %s" % dataset)
                    self.msgSvcRef.publish("StoreResultsAccountant:PollMigration",
                                           self.workflowFile, "00:02:00")
                    self.msgSvcRef.commit()

            Session.commit_all()


            WEWorkflow.setFinished(self.workflow)
            WEWorkflow.remove(self.workflow)
            Session.commit_all()

        return


    def processingComplete(self):
        """
        _processingComplete_

        look at the processing jobs for the workflow, and return True
        if all processing jobs are complete

        """
        intermediateDBS = self.workflowSpec.parameters['DBSURL']
        outputDataset   = self.workflowSpec.outputDatasets()[0].name()

        allJobs      = WEUtils.jobsForWorkflow(self.workflow, "Merge")
        finishedJobs = WEUtils.jobsForWorkflow(self.workflow, "Merge", "finished")
        totalProcessing = len(allJobs)
        totalComplete   = len(finishedJobs)

        logging.info("%s: %s/%s jobs complete" %
                      (self.workflow,totalComplete,totalProcessing))

        if totalProcessing == 0: # Protection for non-sensical situation
            return False

        if totalComplete < totalProcessing:
            return False

        # Check to make sure local DBS knows about all output files
        try:
            reader = DBSReader(intermediateDBS)
            blockList = reader.getFiles(dataset = outputDataset)
        except:
            logging.info("Dataset not in DBS yet")
            return False

        totalRegistered = 0
        for block in blockList:
            totalRegistered += len(blockList[block]['Files'])

        logging.info("%s: %s/%s jobs registered" %
                      (self.workflow,totalRegistered,totalProcessing))
        if totalRegistered < totalProcessing:
            return False

        return True


    def unmergedDatasets(self):
        """
        _unmergedDatasets_

        Retrieve a list of datasets tht can be ForceMerge'd
        """
        extractor = ExtractDatasets()
        self.workflowSpec.payload.operate(extractor)
        result = [ x.name() for x in extractor.datasets ]
        return result
コード例 #19
0
    if opt == "--prod-events":
        totalEvents = int(arg)


if workflowFile == None:
    msg = "--prod-workflow not set"
    raise RuntimeError, msg
if workingDir == None:
    msg = "--working-dir not set"
    raise RuntimeError, msg

#  //
# // Script wide objects
#//
workflowSpec = WorkflowSpec()
workflowSpec.load(workflowFile)
productionDir = "%s/production" % workingDir
mergeProdDir = "%s/production-merge" % workingDir

if not os.path.exists(productionDir):
    os.makedirs(productionDir)
if not os.path.exists(mergeProdDir):
    os.makedirs(mergeProdDir)

mergeProdSpecs = createMergeJobWorkflow(workflowSpec)
prodFactory = RequestJobFactory(workflowSpec, productionDir, productionEvents)

for mergeDS, mergeSpec in mergeProdSpecs.items():
    mrgSpecFile = "%s/%s.xml" % (mergeProdDir, mergeDS.replace("/", "_"))
    mergeSpec.save(mrgSpecFile)
コード例 #20
0
ファイル: cfgFromWorkflow.py プロジェクト: giffels/PRODAGENT
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec
from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWConfig import CMSSWConfig




if workflow == None:
    msg = "Error: --workflow option is required"
    raise RuntimeError, msg

if not os.path.exists(workflow):
    msg = "Cannot find workflow file:\n %s" % workflow
    raise RuntimeError, msg

spec = WorkflowSpec()
spec.load(workflow)





def indexDict(modName, modRef):
    """
    _indexDict_

    Given a PSet like python dictionary, generate an index file from
    it. If it has PSet children, act on them recursively

    """
    result = []
    #  //
コード例 #21
0
ファイル: RelValStatus.py プロジェクト: giffels/PRODAGENT
class RelValStatus:
    """
    _RelValStatus_

    Object to retrieve and compute the overall state of a RelVal
    Workflow

    """
    def __init__(self, config, msgSvcRef,  **workflowDetails):
        self.configuration = config
        self.msgSvcRef = msgSvcRef
        self.workflowDetails = workflowDetails
        self.workflow = workflowDetails['id']
        self.workflowFile = workflowDetails['workflow_spec_file']
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(self.workflowFile)
        
        self.doMigration = self.configuration.get("MigrateToGlobal", False)
        self.doInjection = self.configuration.get("InjectToPhEDEx", False)

        
    def __call__(self):
        """
        _operator()_

        Evaluate the status of this workflow from the WorkflowEntities
        data and publish any events that are triggered

        """
        processed = False
        merged = False
        
        if self.processingComplete():
            logging.info("Processing Complete for %s" % self.workflow)
            processed = True
            #  //
            # //  publish ForceMerge for datasets
            #//
            for dataset in self.unmergedDatasets():
                if countOutstandingUnmergedFiles(dataset) > 0:
                    logging.debug("Publishing ForceMerge for %s" % dataset)
                    self.msgSvcRef.publish("ForceMerge", dataset)
            self.msgSvcRef.commit()
            Session.commit_all()

        if self.mergingComplete():
            logging.info("Merging Complete for %s" % self.workflow)
            merged = True
            #  //
            # // Close Blocks and migrate to global
            #//  Inject them into PhEDEx
            for dataset in self.mergedDatasets():
                if self.doMigration:
                    logging.debug(
                        "Publishing MigrateToGlobal for %s" % dataset)
                    self.msgSvcRef.publish(
                        "DBSInterface:MigrateDatasetToGlobal",
                        dataset)
                    self.msgSvcRef.commit()
                if self.doInjection:
                    logging.debug(
                        "Publishing PhEDExInjectDataset for %s" % dataset)
                    self.msgSvcRef.publish("PhEDExInjectDataset",
                                           dataset)
                    self.msgSvcRef.commit()
                
            Session.commit_all()
                

        if processed and merged:
            #  //
            # // All done: close the workflow out 
            #//
            logging.info("Workflow %s complete" % self.workflow)
            WEWorkflow.setFinished(self.workflow)
            WEWorkflow.remove(self.workflow)
            Session.commit_all()

            #  //
            # // Generate summary
            #//
            self.summariseWorkflow()
            
        return
    

    def summariseWorkflow(self):
        """
        _summariseWorkflow_

        Workflow has been finished, do whatever is required
        to generate a summary for the jobs and dispatch it
        to wherever it is needed
        
        """
        logging.info("Summarising Workflow %s" % self.workflow)
        #  //
        # // Need input from Data Ops here:
        #//
        #  // Can gather information from:
        # //    - Job Reports in Cache
        #//     - ProdMon tables
        #  //   - WE Tables
        # // Generate summary HTML?
        #//  Publish to web somewhere?
        pass
 

    def processingComplete(self):
        """
        _processingComplete_

        look at the processing jobs for the workflow, and return True
        if all processing jobs are complete

        """
        allJobs = WEUtils.jobsForWorkflow(self.workflow, "Processing")
        finishedJobs = WEUtils.jobsForWorkflow(self.workflow, "Processing", "finished")

        totalProcessing = len(allJobs)
        totalProcComplete = len(finishedJobs)

        if totalProcComplete < totalProcessing:
            return False

        return True
        

    def unmergedDatasets(self):
        """
        _unmergedDatasets_

        Retrieve a list of datasets tht can be ForceMerge'd
        """
        extractor = ExtractDatasets()
        self.workflowSpec.payload.operate(extractor)
        result = [ x.name() for x in extractor.datasets ]
        return result

    
    def mergingComplete(self):
        """
        _mergingComplete_

        look at the jobs for the merge jobs for the workflow and
        return True if all merge jobs are complete
        
        """
        allMerges = WEUtils.jobsForWorkflow(self.workflow, "Merge")
        finishedMerges = WEUtils.jobsForWorkflow(self.workflow, "Merge", "finished")
        totalMerging = len(allMerges)
        totalMergeComplete = len(finishedMerges)
        
        if totalMerging == 0:
            # no merges in the system => no merge complete
            return False

        if totalMergeComplete < totalMerging:
            return False
        return True

    def mergedDatasets(self):
        """
        _mergedDatasets_

        Get a list of merged datasets from the workflow that can
        be used to close blocks, migrate and inject

        """
        mergeWorkflow = createMergeDatasetWorkflow(self.workflowSpec)
        extractor = ExtractDatasets()
        mergeWorkflow.payload.operate(extractor)
        result = [ x.name() for x in extractor.datasets ]
        return result
コード例 #22
0
ファイル: DBSPlugin.py プロジェクト: giffels/PRODAGENT
    def __call__(self, collectPayload):
        """
        _operator(collectPayload)_

        Given the dataset and run in the payload, callout to DBS
        to find the files to be harvested

        """
        msg = "DBSPlugin invoked for %s" % str(collectPayload)
        logging.info(msg)


        site = self.args.get("Site", "srm.cern.ch")

        baseCache = os.path.join(self.args['ComponentDir'],
                                 "DBSPlugin")
        if not os.path.exists(baseCache):
            os.makedirs(baseCache)

        datasetCache = os.path.join(baseCache,
                                    collectPayload['PrimaryDataset'],
                                    collectPayload['ProcessedDataset'],
                                    collectPayload['DataTier'])

        if not os.path.exists(datasetCache):
            os.makedirs(datasetCache)

        workflowFile = os.path.join(
            datasetCache,
            "%s-%s-%s-DQMHarvest-Workflow.xml" % (
            collectPayload['PrimaryDataset'],
            collectPayload['ProcessedDataset'],
            collectPayload['DataTier'])
            )
        if not os.path.exists(workflowFile):
            msg = "No workflow found for dataset: %s\n " % (
                collectPayload.datasetPath(),)
            msg += "Looking up software version and generating workflow..."

            if self.args.get("OverrideGlobalTag", None) == None:
                globalTag = findGlobalTagForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'],
                    collectPayload['RunNumber'])
            else:
                globalTag = self.args['OverrideGlobalTag']


            if self.args.get("OverrideCMSSW", None) != None:
                cmsswVersion = self.args['OverrideCMSSW']
                msg = "Using Override for CMSSW Version %s" % (
                    self.args['OverrideCMSSW'],)
                logging.info(msg)
            else:
                cmsswVersion = findVersionForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'],
                    collectPayload['RunNumber'])
                msg = "Found CMSSW Version for dataset/run\n"
                msg += " Dataset %s Run %s\n" % (collectPayload.datasetPath(),
                                                 collectPayload['RunNumber'])
                msg += " CMSSW Version = %s\n " % cmsswVersion
                logging.info(msg)

            workflowSpec = createHarvestingWorkflow(
                collectPayload.datasetPath(),
                site,
                self.args['CmsPath'],
                self.args['ScramArch'],
                cmsswVersion,
                globalTag,
                configFile=self.args['ConfigFile'],
                DQMServer=self.args['DQMServer'],
                proxyLocation=self.args['proxyLocation'],
                DQMCopyToCERN=self.args['DQMCopyToCERN'],
                doStageOut=self.args['DoStageOut'])

            workflowSpec.save(workflowFile)
            msg = "Created Harvesting Workflow:\n %s" % workflowFile
            logging.info(msg)
            self.publishWorkflow(workflowFile, workflowSpec.workflowName())
        else:
            msg = "Loading existing workflow for dataset: %s\n " % (
                collectPayload.datasetPath(),)
            msg += " => %s\n" % workflowFile
            logging.info(msg)

            workflowSpec = WorkflowSpec()
            workflowSpec.load(workflowFile)




        job = {}
        jobSpec = workflowSpec.createJobSpec()
        jobName = "%s-%s-%s" % (
            workflowSpec.workflowName(),
            collectPayload['RunNumber'],
            time.strftime("%H-%M-%S-%d-%m-%y")
            )

        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Harvesting")
        jobSpec.parameters['RunNumber'] = collectPayload['RunNumber']
        jobSpec.addWhitelistSite(site)
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))



        jobSpec.payload.cfgInterface.inputFiles.extend(
            listFilesInRun(
            DBSReader(self.dbsUrl),
            collectPayload['PrimaryDataset'],
            collectPayload['ProcessedDataset'],
            collectPayload['DataTier'],
            collectPayload['RunNumber'])
            )

        specCacheDir =  os.path.join(
            datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)

        jobSpec.save(jobSpecFile)


        job["JobSpecId"] = jobName
        job["JobSpecFile"] = jobSpecFile
        job['JobType'] = "Harvesting"
        job["WorkflowSpecId"] = workflowSpec.workflowName(),
        job["WorkflowPriority"] = 10
        job["Sites"] = [site]
        job["Run"] = collectPayload['RunNumber']
        job['WorkflowSpecFile'] = workflowFile



        msg = "Harvesting Job Created for\n"
        msg += " => Run:       %s\n" % collectPayload['RunNumber']
        msg += " => Primary:   %s\n" % collectPayload['PrimaryDataset']
        msg += " => Processed: %s\n" % collectPayload['ProcessedDataset']
        msg += " => Tier:      %s\n" % collectPayload['DataTier']
        msg += " => Workflow:  %s\n" % job['WorkflowSpecId']
        msg += " => Job:       %s\n" % job['JobSpecId']
        msg += " => Site:      %s\n" % job['Sites']
        logging.info(msg)

        return [job]
コード例 #23
0
class RepackerSetup:
    """
    _RepackerSetup_

    Object to manipulate the Configuration files for a repacker job

    - Extract the details of the repacker job entity stored in the
      config

    - Pull in the lumi server information and add it to the config

    """
    def __init__(self, workflowSpec, jobSpec):

        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(workflowSpec)

        self.jobSpec = JobSpec()
        self.jobSpec.load(jobSpec)

        taskState = TaskState(os.getcwd())
        taskState.loadRunResDB()

        jobSpecFinder = NodeFinder(taskState.taskName())
        self.jobSpec.payload.operate(jobSpecFinder)
        self.jobSpecNode = jobSpecFinder.result

        workflowFinder = NodeFinder(taskState.taskName())
        self.workflowSpec.payload.operate(workflowFinder)
        self.workflowNode = workflowFinder.result

        self.run = None
        self.lumis = []
        self.streamerFiles = []
        self.activeDatasets = []


    def unpackJobEntity(self):
        """
        _unpackJobEntity_

        Get the StreamerJobEntity from the JobSpec node

        """

        repackJobEntity = self.jobSpecNode.cfgInterface.extensions.get('Streamer', None)
        if repackJobEntity == None:
            msg = "No StreamerJobEntity in JobSpec configuration\n"
            msg += "This is required for repacker jobs\n"
            raise RuntimeError, msg

        # Get run and lumi numbers for this job
        self.run = repackJobEntity.data['runNumber']
        self.lumis = repackJobEntity.data['lumiSections']
        print "Repacker Job Handling Run:%s\n LumiSections: %s\n" % (self.run,self.lumis)

        # Sort streamer input by lumi ID for time ordering
        self.streamerFiles = sortByValue(repackJobEntity.data['streamerFiles'])
        msg = "Streamer Files for this job are:\n"
        for strmr in self.streamerFiles:
            msg += "  %s\n" % strmr
        print msg

        # Get list of active datasets for this job
##        self.activeDatasets = repackJobEntity.data['activeOutputModules']
##        msg = "This Job Will repack datasets:\n"
##        for dataset in self.activeDatasets:
##            msg += "  %s\n" % dataset
##        print msg

        return


    def backupPSet(self,filename,process):
        """
        _backupPSet_
        
        Write a backup copy of the current PSet to disk.
        """
        print "Wrote current configurations as %s" % filename
        handle = open(filename, 'w')
        handle.write("import pickle\n")
        handle.write("pickledCfg=\"\"\"%s\"\"\"\n" % pickle.dumps(process))
        handle.write("process = pickle.loads(pickledCfg)\n")
        handle.close()

        return


    def importAndBackupProcess(self):
        """
        _importAndBackupProcess_
        
        Try to import the process object for the job, which is contained in
        PSet.py and save a backup copy of it.
        """
        try:
            from PSet import process
        except ImportError, ex:
            msg = "Failed to import PSet module containing cmsRun Config\n"
            msg += str(ex)
            raise RuntimeError, msg

        print "PSet.py imported"

        self.backupPSet("PSetPreRepack.log",process)

        return process
コード例 #24
0
class JobSpecExpander:

    def __init__(self, jobSpecFile):
        self.jobSpec = JobSpec()
        self.jobSpec.load(jobSpecFile)
        self.taskState = TaskState(os.getcwd())
        self.taskState.loadRunResDB()
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"])

        self.config = self.taskState.configurationDict()

        finder = NodeFinder(self.taskState.taskName())
        self.jobSpec.payload.operate(finder)
        self.jobSpecNode = finder.result

        wffinder = NodeFinder(self.taskState.taskName())
        self.workflowSpec.payload.operate(wffinder)
        self.workflowNode = wffinder.result

        tier0Merge = self.workflowSpec.parameters.get("Tier0Merge", "False")

        if self.jobSpecNode.jobType != "Merge" or tier0Merge == "True":

            if self.config.has_key('Configuration'):
                #try:
                self.createPSet()
                #except Exception, ex:
                #    msg = "Unable to generate cmsRun Config from JobSpec:\n"
                #    msg += str(ex)
                #    print msg
                #    badfile = open("exit.status", 'w')
                #    badfile.write("10040")
                #    badfile.close()

        else:
            #  //
            # // Merge job
            #//
            self.createMergePSet()

        # do after pset created to get correct input files
        self.setJobDetails()
        if self.config.has_key('UserSandbox'):
             self.userSandbox()

    def handleInputLink(self, config, inpLink):
        """
        _handleInputLink_

        Generate the information for the input link between this
        task and the task specified

        """
        msg = "Input Link Detected:\n"
        for k, v in inpLink.items():
            msg += " %s = %s\n" % (k, v)
        print msg

        inputTask = getTaskState(inpLink['InputNode'])

        if inputTask == None:
            msg = "Unable to create InputLink for task: %s\n" % (
                inpLink['InputNode'],)
            msg += "Input TaskState could not be retrieved..."
            raise RuntimeError, msg

        inputTask.loadJobReport()
        inputReport = inputTask.getJobReport()
        if inputReport == None:
            msg = "Unable to create InputLink for task: %s\n" % (
                inpLink['InputNode'],)
            msg += "Unable to load input job report file"
            raise RuntimeError, msg

        # add files to override catalog
        inputFileList = []
        tfc = None

        for file in inputReport.files:
            if not file['ModuleLabel'] == inpLink['OutputModule']:
                continue
            # link to file via lfn (in tfc) if link isn't standalone and we
            # have a valid lfn. Else refer to file via pfn
            if not inpLink['AppearStandalone'] and \
                            file.get('LFN', None) not in (None, '', 'None'):
                if not tfc:
                    tfc = TrivialFileCatalog.TrivialFileCatalog()
                inputFileList.append(file['LFN'])
                tfc.addLfnToPfnRule('override', file['LFN'], file['PFN'])
            else:
                inputFileList.append("file:%s" % file['PFN'])

        if tfc:
            print "Creating override tfc, contents below"
            print str(tfc)
            tfc.write(os.path.join(os.getcwd(), 'override_catalog.xml'))

        if inpLink['InputSource'] == "source":
            #  //
            # // feed into main source
            #//
            config.inputFiles = inputFileList
            if tfc:
                config.inputOverrideCatalog = os.path.join(os.getcwd(), 'override_catalog.xml')
            msg = "Input Link created to input source for files:\n"
            for f in inputFileList:
                msg += " %s\n" % f

            print msg
            return
        #  //
        # // Need to add to secondary source with name provided
        #//
        raise NotImplementedError, "Havent implemented secondary source input links at present..."


    def localCustomization(self, config, merge = False):
        """
        Apply site specific customizations to the config
        """
        site_config = self.taskState.getSiteConfig()

        self.ioCustomization(config, site_config.io_config, merge)


    def ioCustomization(self, config, custom_config, merge = False):
        """
        Apply site specific io customizations
        """
        # Don't do anything if no customization or job has no input files
        if not custom_config or (merge is False and not config.inputFiles):
            return

        import re
        version = lambda x: tuple(int(x) for x in re.compile('(\d+)').findall(x))
        cmssw_version = version(os.environ['CMSSW_VERSION'])

        # Only implemented in CMSSW_2_1_8 and above
        if cmssw_version < (2, 1, 8):
            return

        print "Site specific IO parameters will be used:"

        # cacheSize is a property of InputSource
        cache_size = custom_config.get('cacheSize', None)
        if cache_size:
            # Merge pset creates process on fly so can't use CMSSWConfig object
            if merge:
                from ProdCommon.CMSConfigTools.ConfigAPI.InputSource import InputSource
                inputSource = InputSource(config.source)
                inputSource.setCacheSize(cache_size)
            else:
                config.sourceParams['cacheSize'] = cache_size

        if merge:
            from FWCore.ParameterSet.Modules import Service
            config.add_(Service('AdaptorConfig'))

        for param in custom_config:
            print "  %s %s" % (param, custom_config[param])
            if param == 'cacheSize':
                continue

            if merge:
                import FWCore.ParameterSet.Types as CfgTypes
                adaptor = config.services['AdaptorConfig']
                setattr(adaptor, param,
                        CfgTypes.untracked(CfgTypes.string(str(custom_config[param]))))
            else:
                config.tFileAdaptorConfig[param] = custom_config[param]
        return


    def createPSet(self):
        """
        _createPSet_

        Create the PSet cfg File

        """
        cfgFile = self.config['Configuration'].get("CfgFile", "PSet.py")[0]
        cfgFile = str(cfgFile)
        self.jobSpecNode.loadConfiguration()
        self.jobSpecNode.cfgInterface.rawCfg = self.workflowNode.cfgInterface.rawCfg

        # taken from cmssw environment
        # pylint: disable-msg=F0401
        import FWCore.ParameterSet.Types as CfgTypes
        # pylint: enable-msg=F0401

        workingDir = os.path.join(os.getcwd(), 'prestage')
        if os.path.exists(workingDir + '/prestageTFC.xml'):

           rawCfg = pickle.loads(self.jobSpecNode.cfgInterface.rawCfg)
           rawCfg.source.overrideCatalog = CfgTypes.untracked(CfgTypes.string('trivialcatalog_file:%s/prestageTFC.xml?protocol=local-stage-in' % workingDir))

           self.jobSpecNode.cfgInterface.rawCfg = pickle.dumps(rawCfg)

        # Apply site specific customizations
        self.localCustomization(self.jobSpecNode.cfgInterface)

        for inpLink in self.jobSpecNode._InputLinks:
            #  //
            # // We have in-job input links to be resolved
            #//
            self.handleInputLink(self.jobSpecNode.cfgInterface, inpLink)

        cmsProcess = self.jobSpecNode.cfgInterface.makeConfiguration()



        pycfgDump = open("PyCfgFileDump.log", 'w')
        try:
            pycfgDump.write(cmsProcess.dumpPython())
        except Exception, ex:
            msg = "Error writing python format cfg dump:\n"
            msg += "%s\n" % str(ex)
            msg += "This needs to be reported to the framework team"
            pycfgDump.write(msg)
        pycfgDump.close()

        handle = open(cfgFile, 'w')
        handle.write("import pickle\n")
        handle.write("pickledCfg=\"\"\"%s\"\"\"\n" % pickle.dumps(cmsProcess))
        handle.write("process = pickle.loads(pickledCfg)\n")
        handle.close()

        return
コード例 #25
0
class OfflineDQMSetup:
    """
    _OfflineDQMSetup_

    Generate the PSet for the job on the fly

    """
    def __init__(self):
        self.jobSpec = JobSpec()
        self.jobSpec.load(os.environ['PRODAGENT_JOBSPEC'])
        self.taskState = TaskState(os.getcwd())
        self.taskState.loadRunResDB()
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"])

        self.config = self.taskState.configurationDict()

        finder = NodeFinder(self.taskState.taskName())
        self.jobSpec.payload.operate(finder)
        self.jobSpecNode = finder.result

        wffinder = NodeFinder(self.taskState.taskName())
        self.workflowSpec.payload.operate(wffinder)
        self.workflowNode = wffinder.result

        self.inputFiles = self.jobSpecNode.cfgInterface.inputFiles
        self.globalTag = self.jobSpecNode.cfgInterface.conditionsTag
        self.inputDataset = self.jobSpecNode._InputDatasets[0]
        self.runNumber = self.jobSpec.parameters['RunNumber']
        self.scenario = self.jobSpec.parameters.get('Scenario', 'relvalmc')
        self.refHistKey = self.jobSpec.parameters.get('RefHistKey', None)
        

    def __call__(self):
        """
        _operator()_

        Invoke the setup tool

        """
        msg = "Creating Harvesting Configuration for:\n"
        msg += " => Dataset: %s\n" % self.inputDataset.name()
        msg += " => Run Number: %s\n" % self.runNumber
        msg += " => Global Tag: %s\n" % self.globalTag
        msg += " => Input Files:\n" 
        for inputfile in self.inputFiles:
            msg += "    => %s\n" % inputfile
        print msg

        process = self.importConfigurationLibrary()
        
        pycfgDump = open("PyCfgFileDump.log", 'w')
        try:
            pycfgDump.write(process.dumpPython())
        except Exception, ex:
            msg = "Error writing python format cfg dump:\n"
            msg += "%s\n" % str(ex)
            msg += "This needs to be reported to the framework team"
            pycfgDump.write(msg)
        pycfgDump.close()

        #  //
        # // Save the edited config as PSet.py
        #//
        handle = open("PSet.py", 'w')
        handle.write("import pickle\n")
        handle.write("pickledCfg=\"\"\"%s\"\"\"\n" % pickle.dumps(process))
        handle.write("process = pickle.loads(pickledCfg)\n")
        handle.close()
        print "Wrote PSet.py for harvesting"
        return
コード例 #26
0
ファイル: removeWorkflow.py プロジェクト: giffels/PRODAGENT
import sys, os

from ProdAgentDB.Config import defaultConfig as dbConfig
from ProdCommon.Database import Session
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec
from ProdAgentCore.Configuration import loadProdAgentConfiguration

from MergeSensor.MergeSensorDB import MergeSensorDB
from JobQueue.JobQueueDB import JobQueueDB
import ProdAgent.WorkflowEntities.Aux as WEAux
import ProdAgent.WorkflowEntities.Workflow as WEWorkflow

workflow = sys.argv[1]
workflowSpec = WorkflowSpec()
workflowSpec.load(workflow)

#  //
# // Clean out job cache
#//
config = loadProdAgentConfiguration()
compCfg = config.getConfig("JobCreator")
creatorCache = os.path.expandvars(compCfg['ComponentDir'])

workflowCache = os.path.join(creatorCache, workflowSpec.workflowName())
if os.path.exists(workflowCache):
    os.system("/bin/rm -rf %s" % workflowCache)



Session.set_database(dbConfig)
コード例 #27
0
#!/usr/bin/env python

import os
import pickle
from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWAPILoader import CMSSWAPILoader
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec
from ProdCommon.MCPayloads.DatasetTools import getOutputDatasetsWithPSet

specfile = "/uscms/home/gutsche/CSA08-JetET110-CSA08_S43_S43_rereco_may19_PIC_v1-Workflow.xml"

rawCfgFile = "%s.raw.cfg" % os.path.basename(specfile)
origCfgFile = "%s.orig.cfg" % os.path.basename(specfile)
dbsCfgFile = "%s.dbs.cfg" % os.path.basename(specfile)

spec = WorkflowSpec()
spec.load(specfile)

rawCfg = spec.payload.cfgInterface.rawCfg
originalCfg = spec.payload.cfgInterface.originalCfg

dbsDatasets = getOutputDatasetsWithPSet(spec.payload)

handle = open(dbsCfgFile, 'w')
handle.write(dbsDatasets[0]['PSetContent'])
handle.close()

handle = open(origCfgFile, 'w')
handle.write(originalCfg)
handle.close()

loader = CMSSWAPILoader(os.environ['SCRAM_ARCH'],
コード例 #28
0
ファイル: RuntimeLogArch.py プロジェクト: giffels/PRODAGENT
class LogArchMgr:


    def __init__(self):
        self.state = TaskState(os.getcwd())
        self.state.loadRunResDB()
        self.state.loadJobSpecNode()

        #  //
        # // check for store fail settings
        #//
        self.workflow = WorkflowSpec()
        self.workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC'])
        self.doStoreFail = self.workflow.parameters.get("UseStoreFail", False)
        if str(self.doStoreFail).lower() == "true":
            self.doStoreFail = True

        self.config = self.state.configurationDict()

        self.inputTasks = self.config.get("InputTasks", [])
#        #TODO: not really sure this is correct, i would think i should
#        # take report from stageOut but that is missing if cmsRun fails
#        # what if cmsRun one is missing - do i want to generate an empty one?
#        self.inputReport = getTaskState(self.inputTasks[0]).getJobReport()

        # iterate over input tasks (in reverse order)
        # find first one with a fjr
        self.inputTask, self.inputReport = None, None
        for taskName in self.inputTasks[::-1]:
            task = getTaskState(taskName)
            report = task.getJobReport()
            if report is None:
                continue
            self.inputTask = task
            self.inputReport = report
            break

        # if got no valid fjr from previous tasks -
        # something must have gone wrong earlier - make our own
        # may need more things set here to make reports mergeable
        if self.inputReport is None:
            self.inputTask = self.state
            self.inputReport = FwkJobReport()

        self.regexps = self.config.get("LogMatchRegexp", [])

        self.doStageOut = True
        doingStageOut = self.config.get("DoStageOut", [])
        if len(doingStageOut) > 0:
            control = doingStageOut[-1]
            if control == "False":
                self.doStageOut = False


        self.workflowSpecId = self.config['WorkflowSpecID'][0]
        self.jobSpecId = self.state.jobSpecNode.jobName


        self.compRegexps = []
        for regexp in self.regexps:
            self.compRegexps.append(re.compile(regexp))


        # TODO: These should be pulled in from the workflow now not the config thing
        self.override = False
        soParams = self.config.get('StageOutParameters', {})
        self.override = soParams.has_key("Override")
        self.overrideParams = {}

        if self.override:
            overrideConf = self.config['StageOutParameters']['Override']
            self.overrideParams = {
                "command" : None,
                "option" : None,
                "se-name" : None,
                "lfn-prefix" : None,
                }

            try:
                self.overrideParams['command'] = overrideConf['command'][0]
                self.overrideParams['se-name'] = overrideConf['se-name'][0]
                self.overrideParams['lfn-prefix'] = overrideConf['lfn-prefix'][0]
            except StandardError, ex:
                msg = "Unable to extract Override parameters from config:\n"
                msg += str(self.config['StageOutParameters'])
                raise StageOutInitError(msg)

            if overrideConf.has_key('option'):
                if len(overrideConf['option']) > 0:
                    self.overrideParams['option'] = overrideConf['option'][-1]
                else:
                    self.overrideParams['option'] = ""
コード例 #29
0
ファイル: overrideStageOut.py プロジェクト: giffels/PRODAGENT
    raise RuntimeError, msg


for key, val in override.items():
    if val == None:
        msg = "--%s Option Not Provided\n" % key
        msg += "This option is required\n"
        raise RuntimeError, msg

    
if workflowSpec != None:
    if not os.path.exists(workflowSpec):
        msg = "Workflow Spec file Not Found:\n%s\n" % workflowSpec
        raise RuntimeError, msg
    spec = WorkflowSpec()
    spec.load(workflowSpec)
    specFile = workflowSpec

if jobSpec != None:
    if not os.path.exists(jobSpec):
        msg = "Job Spec file Not Found:\n%s\n" % jobSpec
        raise RuntimeError, msg

    spec = JobSpec()
    spec.load(jobSpec)
    specFile = jobSpec

allNames = listAllNames(spec.payload)
if stageOutNode not in allNames:
    msg = "Error: Cannot find Node named %s in spec\n" % stageOutNode
    msg += "Node names are: %s" % allNames