コード例 #1
0
ファイル: RuntimePSetPrep.py プロジェクト: giffels/PRODAGENT
class JobSpecExpander:

    def __init__(self, jobSpecFile):
        self.jobSpec = JobSpec()
        self.jobSpec.load(jobSpecFile)
        self.taskState = TaskState(os.getcwd())
        self.taskState.loadRunResDB()
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"])
        
        self.config = self.taskState.configurationDict()

        finder = NodeFinder(self.taskState.taskName())
        self.jobSpec.payload.operate(finder)
        self.jobSpecNode = finder.result

        wffinder = NodeFinder(self.taskState.taskName())
        self.workflowSpec.payload.operate(wffinder)
        self.workflowNode = wffinder.result

        if self.jobSpecNode.jobType != "Merge":
            if self.config.has_key('Configuration'):
                try:
                    self.createPSet()
                except Exception, ex:
                    msg = "Unable to generate cmsRun Config from JobSpec:\n"
                    msg += str(ex)
                    print msg
                    badfile = open("exit.status", 'w')
                    badfile.write("10040")
                    badfile.close()
        else:
コード例 #2
0
ファイル: CleanUpTools.py プロジェクト: PerilousApricot/CRAB2
def createCleanupWorkflowSpec():
    """
    _createCleanupWorkflowSpec_

    Create a generic cleanup WorkflowSpec definition
    that can be used to generate a sanbox for cleanup jobs

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("CleanUp-%s" % timestamp)
    workflow.setActivity("CleanUp")
    workflow.setRequestCategory("mc-cleanup")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters['WorkflowType']="CleanUp"
    

    cleanUp = workflow.payload
    cleanUp.name = "cleanUp1"
    cleanUp.type = "CleanUp" 
    
    cleanUp.application["Project"] = ""
    cleanUp.application["Version"] = ""
    cleanUp.application["Architecture"] = ""
    cleanUp.application["Executable"] = "RuntimeCleanUp.py" # binary name
    cleanUp.configuration = ""
    cleanUp.cfgInterface = None


    return workflow
コード例 #3
0
ファイル: RequestSpec.py プロジェクト: jlexternal/WPrime13TeV
    def load(self, improvNode):
        """
        _load_

        Extract information for this object from the improv instance provided
        """
        wfQuery = IMProvQuery("/RequestSpec/WorkflowSpec")
        wfnode = wfQuery(improvNode)[0]
        wfspec = WorkflowSpec()
        wfspec.loadFromNode(wfnode)
        self.workflow = wfspec

        policyQuery = IMProvQuery("/RequestSpec/Policies/*")
        detailQuery = IMProvQuery("/RequestSpec/RequestDetails/*")
        preferredPAQuery = IMProvQuery("/RequestSpec/PreferredPA")

        policies = policyQuery(improvNode)
        details = detailQuery(improvNode)
        preferredPAs = preferredPAQuery(improvNode)

        for policy in policies:
            self.policies[str(policy.name)] = str(policy.chardata)

        for detail in improvNode.attrs.keys():
            self.requestDetails[detail] = str(improvNode.attrs[detail])

        for preferredPA in preferredPAs:
            self.preferredPAs[str(preferredPA.attrs['id'])] = \
                str(preferredPA.attrs['priority'])

        return
コード例 #4
0
ファイル: RequestSpec.py プロジェクト: PerilousApricot/CRAB2
    def load(self, improvNode):
        """
        _load_

        Extract information for this object from the improv instance provided
        """
        wfQuery = IMProvQuery("/RequestSpec/WorkflowSpec")
        wfnode = wfQuery(improvNode)[0]
        wfspec = WorkflowSpec()
        wfspec.loadFromNode(wfnode)
        self.workflow = wfspec

        policyQuery = IMProvQuery("/RequestSpec/Policies/*")
        detailQuery = IMProvQuery("/RequestSpec/RequestDetails/*")
        preferredPAQuery = IMProvQuery("/RequestSpec/PreferredPA")

        policies = policyQuery(improvNode)
        details = detailQuery(improvNode)
        preferredPAs = preferredPAQuery(improvNode)

        for policy in policies:
            self.policies[str(policy.name)] = str(policy.chardata)

        for detail in improvNode.attrs.keys():
            self.requestDetails[detail] = str(improvNode.attrs[detail])
 
        for preferredPA in preferredPAs:
            self.preferredPAs[str(preferredPA.attrs['id'])] = \
                str(preferredPA.attrs['priority'])

        return
コード例 #5
0
def getCMSSoft(work,reverse=False):
    """
    opens the workflowfile and gets the CMSSoft version
    if reverse, returns a map between CMSSoft version and real workflowname
    """

    new_work={}
    workflowSpec = WorkflowSpec()
    for fil in work:
        try:
            workflowSpec.load(fil)
            cmssw=workflowSpec.payload.application['Version']
            name=workflowSpec.parameters['WorkflowName']
            if reverse:
                if not new_work.has_key(cmssw):
                    new_work[cmssw]=[]
                    new_work[cmssw].append(name)
            else:
                new_work[name]=cmssw
        except:
            """
            something went wrong

            """
            msg="WorkflowConstraints getCMSSoft: something went wrong while handling file "+fil
            print(msg)

    return new_work
コード例 #6
0
ファイル: WatchedDatasets.py プロジェクト: giffels/PRODAGENT
    def add(self, workflowFile):
        """
        _add_
        
        Add a dataset to the list of watched datasets.
        
        Arguments:
            
          workflowFile -- the workflow specification file
        
        Return:
            
          the datasetId

        """

        # read the WorkflowSpecFile
        try:
            wfile = WorkflowSpec()
            wfile.load(workflowFile)

        # wrong dataset file
        except Exception, msg:
            raise InvalidDataset, \
                  "Error loading workflow specifications from %s" % workflowFile
コード例 #7
0
ファイル: RelValSpecMgr.py プロジェクト: giffels/PRODAGENT
    def makeJobs(self, testInstance):
        """
        _makeJobs_

        Create Job Specs for the test instance provided

        """
        logging.info("Creating Jobs for test %s at site %s" % (
            testInstance['Name'],
            testInstance['Site'])
                     )
        testName = testInstance['WorkflowSpecId']
        specInstance = WorkflowSpec()
        specInstance.load(testInstance['WorkflowSpecFile'])

        if testInstance['InputDataset'] == None:
            initialRun = self.jobCounts.get(testInstance['Name'], 1)
            factory = RequestJobFactory(
                specInstance,
                testInstance['WorkingDir'],
                testInstance['TotalEvents'],
                InitialRun = initialRun,
                EventsPerJob = testInstance['EventsPerJob'],
                Sites = [testInstance['Site']])

            jobsList = factory()
            self.jobCounts[testInstance['Name']] += len(jobsList)
        else:

            factory = DatasetJobFactory(
                specInstance,
                testInstance['WorkingDir'],
                specInstance.parameters['DBSURL'],
                )

            jobsList = factory()
            self.jobCounts[testInstance['Name']] += len(jobsList)


        msg = "Created %s jobs:\n" % len(jobsList)

        for job in jobsList:
            jobSpecFile = job['JobSpecFile']
            jobSpecId = job['JobSpecId']
            msg += "  %s\n" % jobSpecId
            testInstance['JobSpecs'][jobSpecId] = jobSpecFile



        logging.info(msg)



        return
コード例 #8
0
ファイル: InjectTestLCG.py プロジェクト: giffels/PRODAGENT
def GoodWorkflow(workflow):
   """
   Check if workflow can be loaded
   """
   RequestDir,firstrun = getRequestInjectorConfig()
   workflowCache="%s/WorkflowCache"%RequestDir
   workflowSpec = WorkflowSpec()
   try:
      workflowSpec.load(workflow)
   except:
      return False
   return True
コード例 #9
0
ファイル: PluginInterface.py プロジェクト: giffels/PRODAGENT
    def loadWorkflow(self, specFile):
        """
        _loadWorkflow_

        Helper method, since every plugin will have to do
        something with a workflow

        """
        spec = WorkflowSpec()
        try:
            spec.load(specFile)
        except Exception, ex:
            msg = "Unable to read workflow spec file:\n%s\n" % specFile
            msg += str(ex)
            raise RuntimeError, msg
コード例 #10
0
class FactoryInterface:
    """
    _FactoryInterface_

    JobSpec Factory Interface defintion & common utils for
    all job spec factory generators

    """
    def __init__(self, workflowSpec):
        # or use isinstance(WorkflowSpec) if need to include sub classes
        if workflowSpec.__class__ is WorkflowSpec:
            self.workflow = workflowSpec
        else:
            self.workflow = WorkflowSpec()
            self.workflow.load(workflowSpec)
コード例 #11
0
 def __init__(self, workflowSpec):
     # or use isinstance(WorkflowSpec) if need to include sub classes
     if workflowSpec.__class__ is WorkflowSpec:
         self.workflow = workflowSpec
     else:
         self.workflow = WorkflowSpec()
         self.workflow.load(workflowSpec)
コード例 #12
0
def createProductionWorkflow(prodName,
                             cmsswVersion,
                             cfgFile=None,
                             category="mc",
                             **args):
    """
    _createProductionWorkflow_

    Create a Production style workflow, ie generation of new events

    """

    timestamp = int(time.time())
    if args.get("PyCfg", None) == None:
        if cfgFile == None:
            msg = "Error: No Cfg File or python cfg file provided to createProductionWorkflow"
            raise RuntimeError, msg
        pycfgFile = createPythonConfig(cfgFile)
        pycfgFileContent = file(pycfgFile).read()
    else:
        pycfgFileContent = args['PyCfg']

    if args.get("PSetHash", None) == None:
        realPSetHash = createPSetHash(cfgFile)
    else:
        realPSetHash = args['PSetHash']

    #  //
    # // Create a new WorkflowSpec and set its name
    #//
    spec = WorkflowSpec()
    workflowname = "%s__%s-%s-%s-%s" % (
        prodName, cmsswVersion, args.get("processingLabel", "Test07"),
        args.get("physicsGroup", "NoPhysicsGroup"), timestamp)
    spec.setWorkflowName(workflowname)
    spec.setRequestCategory(category)
    spec.setRequestTimestamp(timestamp)

    cmsRun = spec.payload
    populateCMSRunNode(cmsRun,
                       "cmsRun1",
                       cmsswVersion,
                       pycfgFileContent,
                       realPSetHash,
                       timestamp,
                       prodName,
                       physicsGroup=args.get("physicsGroup", "NoPhysicsGroup"),
                       processingLabel=args.get("processingLabel", "Test07"),
                       fakeHash=args.get("FakeHash", False))

    addStageOutNode(cmsRun, "stageOut1")
    generateFilenames(spec)
    return spec
コード例 #13
0
ファイル: ResultsStatus.py プロジェクト: giffels/PRODAGENT
    def __init__(self, config, msgSvcRef,  **workflowDetails):
        self.configuration = config
        self.msgSvcRef = msgSvcRef
        self.workflowDetails = workflowDetails
        self.workflow = workflowDetails['id']
        self.workflowFile = workflowDetails['workflow_spec_file']
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(self.workflowFile)

        self.doMigration = self.configuration.get("MigrateToGlobal", True)
        self.doInjection = self.configuration.get("InjectToPhEDEx", True)
コード例 #14
0
def createLogCollectorWorkflowSpec(wf):
    """
    _createLogColectorWorkflowSpec_

    Create a generic LogArchive WorkflowSpec definition

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("LogCollect-%s" % timestamp)
    workflow.setActivity("LogCollect")
    workflow.setRequestCategory("logcollect")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters["WorkflowType"] = "LogCollect"

    logArchive = workflow.payload
    logArchive.name = "logCollect1"
    logArchive.type = "LogCollect"
    # TODO: remove this?
    # logArchive.workflow = wf
    logArchive.configuration
    logArchive.application["Project"] = ""
    logArchive.application["Version"] = ""
    logArchive.application["Architecture"] = ""
    logArchive.application["Executable"] = "RuntimeLogCollector.py"  # binary name
    logArchive.configuration = ""
    logArchive.cfgInterface = None

    # set stageOut override
    # cfg = IMProvNode("config")
    # stageOut = IMProvNode("StageOutParameters")
    # cfg.addNode()
    # WorkflowTools.addStageOutNode(logArchive, "StageOut1")
    # WorkflowTools.addStageOutOverride(logArchive, stageOutParams['command'],
    #                                  stageOutParams['option'],
    #                                  stageOutParams['se-name'],
    #                                  stageOutParams['lfnPrefix'])

    return workflow
コード例 #15
0
ファイル: T0ASTPlugin.py プロジェクト: giffels/PRODAGENT
    def createWorkflow(self, runNumber, primaryDataset,
                       processedDataset, dataTier):
        """
        _createWorkflow_

        Create a workflow for a given run and primary dataset.  If the workflow
        has been created previously, load it and use it.
        """
        jobCache = os.path.join(self.args["ComponentDir"], "T0ASTPlugin",
                                "Run" + runNumber)
        if not os.path.exists(jobCache):
            os.makedirs(jobCache)

        workflowSpecFileName = "DQMHarvest-Run%s-%s-workflow.xml" % (runNumber, primaryDataset)
        workflowSpecPath = os.path.join(jobCache, workflowSpecFileName)

        if os.path.exists(workflowSpecPath):
            msg = "Loading existing workflow for dataset: %s\n " % primaryDataset
            msg += " => %s\n" % workflowSpecPath
            logging.info(msg)

            workflowSpec = WorkflowSpec()
            workflowSpec.load(workflowSpecPath)
            return (workflowSpec, workflowSpecPath)
            
        msg = "No workflow found for dataset: %s\n " % primaryDataset
        msg += "Looking up software version and generating workflow..."

        recoConfig = self.t0astWrapper.listRecoConfig(runNumber, primaryDataset)

        if not recoConfig["DO_RECO"]:
            logging.info("RECO disabled for dataset %s" % primaryDataset)
            return (None, None)

        globalTag = self.args.get("OverrideGlobalTag", None)
        if globalTag == None:
            globalTag = recoConfig["GLOBAL_TAG"]
            
        cmsswVersion = self.args.get("OverrideCMSSW", None)
        if cmsswVersion == None:
            cmsswVersion = recoConfig["CMSSW_VERSION"]

        datasetPath = "/%s/%s/%s" % (primaryDataset, processedDataset, dataTier)
        workflowSpec = createHarvestingWorkflow(datasetPath, self.site, 
                                                self.args["CmsPath"],
                                                self.args["ScramArch"],
                                                cmsswVersion, globalTag,
                                                configFile=self.args["ConfigFile"],
                                                DQMServer=self.args['DQMServer'],
                                                proxyLocation=self.args['proxyLocation'],
                                                DQMCopyToCERN=self.args['DQMCopyToCERN'],
                                                doStageOut=self.args['DoStageOut'])
        
        
        workflowSpec.save(workflowSpecPath)
        msg = "Created Harvesting Workflow:\n %s" % workflowSpecPath
        logging.info(msg)
        self.publishWorkflow(workflowSpecPath, workflowSpec.workflowName())
        return (workflowSpec, workflowSpecPath)
コード例 #16
0
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Generate a workflow.  If the self.configFile parameter has been set
        this will attempt to load the config from file, otherwise it will
        create an empty process object which will get filled in by the runtime
        script.
        """
        self.timestamp = int(time.time())
        self.workflow = WorkflowSpec()
        self.workflowName = "AlcaSkim-Run%s-%s" % \
                            (self.run, self.primaryDataset)

        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters["WorkflowType"] = "Processing"
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"
        self.cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        self.cmsRunNode.application["Executable"] = "cmsRun"
        self.cmsRunNode.application["Project"] = "CMSSW"
        self.cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        inputDataset = self.cmsRunNode.addInputDataset(self.primaryDataset,
                                                       self.parentProcessedDataset)
        inputDataset["DataTier"] = "RECO"
        
        if self.configFile == None:
            self.loadProcessFromFramework()            
        else:
            self.loadProcessFromFile()
            
        self.setupOutputModules()

        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow
コード例 #17
0
def createJobSpec(jobSpecId,
                  workflowSpecFile,
                  filename,
                  runNumber,
                  eventCount,
                  firstEvent=None,
                  saveString=False,
                  loadString=True):

    #  //
    # // Load workflow
    #//
    workflowSpec = WorkflowSpec()
    if loadString:
        workflowSpec.loadString(workflowSpecFile)
    else:
        workflowSpec.load(workflowSpecFile)

    #  //
    # // Create JobSpec
    #//
    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (workflowSpec.workflowName(), runNumber)

    #jobSpec.setJobName(jobName)
    jobSpec.setJobName(jobSpecId)
    jobSpec.setJobType("Processing")
    jobSpec.parameters['RunNumber'] = runNumber
    jobSpec.parameters['EventCount'] = eventCount

    jobSpec.payload.operate(DefaultLFNMaker(jobSpec))

    if firstEvent != None:
        jobSpec.parameters['FirstEvent'] = firstEvent

    cfgMaker = ConfigGenerator(jobSpec)
    jobSpec.payload.operate(cfgMaker)

    if saveString:
        return jobSpec.saveString()
    jobSpec.save(filename)
    return
コード例 #18
0
    def __init__(self, jobSpecFile):
        self.jobSpec = JobSpec()
        self.jobSpec.load(jobSpecFile)
        self.taskState = TaskState(os.getcwd())
        self.taskState.loadRunResDB()
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"])

        self.config = self.taskState.configurationDict()

        finder = NodeFinder(self.taskState.taskName())
        self.jobSpec.payload.operate(finder)
        self.jobSpecNode = finder.result

        wffinder = NodeFinder(self.taskState.taskName())
        self.workflowSpec.payload.operate(wffinder)
        self.workflowNode = wffinder.result

        tier0Merge = self.workflowSpec.parameters.get("Tier0Merge", "False")

        if self.jobSpecNode.jobType != "Merge" or tier0Merge == "True":

            if self.config.has_key('Configuration'):
                #try:
                self.createPSet()
                #except Exception, ex:
                #    msg = "Unable to generate cmsRun Config from JobSpec:\n"
                #    msg += str(ex)
                #    print msg
                #    badfile = open("exit.status", 'w')
                #    badfile.write("10040")
                #    badfile.close()

        else:
            #  //
            # // Merge job
            #//
            self.createMergePSet()

        # do after pset created to get correct input files
        self.setJobDetails()
        if self.config.has_key('UserSandbox'):
             self.userSandbox()
コード例 #19
0
def createProductionWorkflow(prodName, cmsswVersion, cfgFile = None,
                             category = "mc", **args):
    """
    _createProductionWorkflow_

    Create a Production style workflow, ie generation of new events

    """

    timestamp = int(time.time())
    if args.get("PyCfg", None) == None:
        if cfgFile == None:
            msg = "Error: No Cfg File or python cfg file provided to createProductionWorkflow"
            raise RuntimeError, msg
        pycfgFile = createPythonConfig(cfgFile)
        pycfgFileContent = file(pycfgFile).read()
    else:
        pycfgFileContent = args['PyCfg']



    if args.get("PSetHash", None) == None:
        realPSetHash = createPSetHash(cfgFile)
    else:
        realPSetHash = args['PSetHash']


    #  //
    # // Create a new WorkflowSpec and set its name
    #//
    spec = WorkflowSpec()
    workflowname = "%s__%s-%s-%s-%s"%(prodName,cmsswVersion,args.get("processingLabel","Test07"),args.get("physicsGroup","NoPhysicsGroup"),timestamp)
    spec.setWorkflowName(workflowname)
    spec.setRequestCategory(category)
    spec.setRequestTimestamp(timestamp)

    cmsRun = spec.payload
    populateCMSRunNode(cmsRun, "cmsRun1", cmsswVersion, pycfgFileContent, realPSetHash,
                       timestamp, prodName, physicsGroup = args.get("physicsGroup", "NoPhysicsGroup"), processingLabel=args.get("processingLabel", "Test07"), fakeHash = args.get("FakeHash", False))


    addStageOutNode(cmsRun, "stageOut1")
    generateFilenames(spec)
    return spec
コード例 #20
0
ファイル: RequestIterator.py プロジェクト: giffels/PRODAGENT
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.count = 0
        self.runIncrement = 1
        self.currentJob = None
        self.sitePref = None
        self.pileupDatasets = {}
        self.ownedJobSpecs = {}
        
        #  //
        # // Initially hard coded, should be extracted from Component Config
        #//
        self.eventsPerJob = 10 
        
        self.workflowSpec = WorkflowSpec()
        try:
         self.workflowSpec.load(workflowSpecFile)
        except:
         logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile))
         return

        if self.workflowSpec.parameters.get("RunIncrement", None) != None:
            self.runIncrement = int(
                self.workflowSpec.parameters['RunIncrement']
                )

    
        self.generators = GeneratorMaker()
        self.workflowSpec.payload.operate(self.generators)
        
        
        
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
コード例 #21
0
ファイル: EventJobSpec.py プロジェクト: PerilousApricot/CRAB2
def createJobSpec(jobSpecId,workflowSpecFile, filename, runNumber, eventCount,  firstEvent = None,saveString=False,loadString=True):

    #  //
    # // Load workflow
    #//
    workflowSpec = WorkflowSpec()
    if loadString:
        workflowSpec.loadString(workflowSpecFile)
    else:
        workflowSpec.load(workflowSpecFile)

    

    #  //
    # // Create JobSpec
    #//
    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (
        workflowSpec.workflowName(),
        runNumber
            )


    #jobSpec.setJobName(jobName)
    jobSpec.setJobName(jobSpecId)
    jobSpec.setJobType("Processing")
    jobSpec.parameters['RunNumber'] = runNumber
    jobSpec.parameters['EventCount'] = eventCount

    jobSpec.payload.operate(DefaultLFNMaker(jobSpec))

    if firstEvent != None:
        jobSpec.parameters['FirstEvent'] = firstEvent

    cfgMaker = ConfigGenerator(jobSpec)
    jobSpec.payload.operate(cfgMaker)

    if saveString:    
       return jobSpec.saveString()
    jobSpec.save(filename)
    return
コード例 #22
0
    def __init__(self, workflowSpec, jobSpec):

        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(workflowSpec)

        self.jobSpec = JobSpec()
        self.jobSpec.load(jobSpec)

        taskState = TaskState(os.getcwd())
        taskState.loadRunResDB()

        jobSpecFinder = NodeFinder(taskState.taskName())
        self.jobSpec.payload.operate(jobSpecFinder)
        self.jobSpecNode = jobSpecFinder.result

        workflowFinder = NodeFinder(taskState.taskName())
        self.workflowSpec.payload.operate(workflowFinder)
        self.workflowNode = workflowFinder.result

        self.run = None
        self.lumis = []
        self.streamerFiles = []
        self.activeDatasets = []
コード例 #23
0
    def __init__(self):
        self.jobSpec = JobSpec()
        self.jobSpec.load(os.environ['PRODAGENT_JOBSPEC'])
        self.taskState = TaskState(os.getcwd())
        self.taskState.loadRunResDB()
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(os.environ["PRODAGENT_WORKFLOW_SPEC"])

        self.config = self.taskState.configurationDict()

        finder = NodeFinder(self.taskState.taskName())
        self.jobSpec.payload.operate(finder)
        self.jobSpecNode = finder.result

        wffinder = NodeFinder(self.taskState.taskName())
        self.workflowSpec.payload.operate(wffinder)
        self.workflowNode = wffinder.result

        self.inputFiles = self.jobSpecNode.cfgInterface.inputFiles
        self.globalTag = self.jobSpecNode.cfgInterface.conditionsTag
        self.inputDataset = self.jobSpecNode._InputDatasets[0]
        self.runNumber = self.jobSpec.parameters['RunNumber']
        self.scenario = self.jobSpec.parameters.get('Scenario', 'relvalmc')
        self.refHistKey = self.jobSpec.parameters.get('RefHistKey', None)
コード例 #24
0
class WorkflowMaker:
    """
    _WorkflowMaker_

    Basic MC workflow maker for PR to use to create workflow spec files.
    
    """
    def __init__(self, requestId, channel, label):
        self.requestId = requestId
        self.group = None
        self.label = label
        self.timestamp = int(time.time())
        self.channel = channel
        self.cmsswVersions = []
        self.configurations = []
        self.psetHashes = {}
        self.origCfgs = {}
        self.acquisitionEra = None
        self.processingString = None
        self.processingVersion = None
        self.conditions = None

        # turn on use of proper naming convention for datasets
        # should be made the default soon, lets deprecate all the old crap
        self.useProperNamingConventions = False

        self.options = {}
        self.options.setdefault('FakeHash', False)

        # Should we use another attribute for setting the output dataset
        # status in DBS?
        self.outputDatasetStatus = 'VALID'

        self.inputDataset = {}
        self.inputDataset['IsUsed'] = False
        self.inputDataset['DatasetName'] = None
        self.inputDataset['Primary'] = None
        self.inputDataset['Processed'] = None
        self.inputDataset['DataTier'] = None
        #  //
        # // Extra controls over input dataset if required
        #//
        self.inputDataset['SplitType'] = None
        self.inputDataset['SplitSize'] = None
        self.inputDataset['OnlySites'] = None
        self.inputDataset['OnlyBlocks'] = None
        self.inputDataset['OnlyClosedBlocks'] = True

        #  //
        # // Pileup Dataset controls
        #//
        self.pileupDatasets = []

        #  //
        # // Initialise basic workflow
        #//
        self.workflow = WorkflowSpec()
        self.workflowName = "%s-%s-%s" % (label, channel, requestId)
        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("mc")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters['RequestLabel'] = self.label
        self.workflow.parameters['ProdRequestID'] = self.requestId

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"

        self.cmsRunNodes = [self.cmsRunNode]
        self.saveOutputFor = []

    def chainCmsRunNode(self, stageOutIntermediates=False, *outputModules):
        """
        append a cmsRun config to the current cmsRun node and chain them
        """
        if stageOutIntermediates:  #Do we want to keep cmsRunNode's products?
            self.saveOutputFor.append(self.cmsRunNode.name)
        newnode = self.cmsRunNode.newNode("cmsRun%s" %
                                          (len(self.cmsRunNodes) + 1))
        newnode.type = "CMSSW"
        if not outputModules:
            outputModules = self.configurations[-1].outputModules.keys()
        for outmodule in outputModules:
            newnode.addInputLink(self.cmsRunNode.name,
                                 outmodule,
                                 'source',
                                 AppearStandalone=not stageOutIntermediates)
        self.cmsRunNode = newnode
        self.cmsRunNodes.append(newnode)

    def changeCategory(self, newCategory):
        """
        _changeCategory_

        Change the workflow category from the default mc
        that appears in the LFNs

        """
        self.workflow.setRequestCategory(newCategory)
        return

    def setAcquisitionEra(self, era):
        """
        _setAcquisitionEra_
        
        Sets the AcquisitionEra in the workflow 

        """
        self.workflow.setAcquisitionEra(era)
        self.acquisitionEra = era
        return

    def setNamingConventionParameters(self, era, procString, procVers):
        """
        _setNamingConventionParameters_

        Sets AcquisitionEra, ProcessingString and ProcessingVersion

        """
        self.workflow.setAcquisitionEra(era)
        self.workflow.parameters['ProcessingString'] = procString
        self.workflow.parameters['ProcessingVersion'] = procVers

        self.acquisitionEra = era
        self.processingString = procString
        self.processingVersion = procVers

        self.useProperNamingConventions = True

        return

    def setActivity(self, activity):
        """
        _changeWorkflowType_
        
        Set the workflow type
        i.e. Simulation, Reconstruction, Reprocessing, Skimming
        """
        self.workflow.setActivity(activity)
        return

    def setCMSSWVersion(self, version):
        """
        _setCMSSWVersion_

        Set the version of CMSSW to be used

        """
        self.cmsswVersions.append(version)
        self.cmsRunNode.application['Version'] = version
        self.cmsRunNode.application['Executable'] = "cmsRun"
        self.cmsRunNode.application['Project'] = "CMSSW"
        self.cmsRunNode.application['Architecture'] = ""
        return

    def setUserSandbox(self, sandboxloc):
        """
        _setSandbox_
        Sets the location of the user sandbox

        """
        self.cmsRunNode.userSandbox = sandboxloc
        return

    def setPhysicsGroup(self, group):
        """
        _setPhysicsGroup_

        Physics Group owning the workflow

        """
        self.group = group
        self.workflow.parameters['PhysicsGroup'] = self.group
        return

    def setConfiguration(self, cfgFile, **args):
        """
        _setConfiguration_

        Provide the CMSSW configuration to be used.
        By default, assume that cfgFile is a python format string.

        The format & type can be specified using args:

        - Type   : must be "file" or "string" or "instance"
        
        """
        cfgType = args.get("Type", "instance")

        if cfgType not in ("file", "string", "instance"):
            msg = "Illegal Type for cfg file: %s\n" % cfgType
            msg += "Should be \"file\" or \"string\"\n"
            raise RuntimeError, msg

        cfgContent = cfgFile
        if cfgType == "file":
            cfgContent = file(cfgFile).read()
            cfgType = "string"

        if cfgType == "string":
            cfgData = cfgContent
            cfgContent = CMSSWConfig()
            cfgContent.unpack(cfgData)

        self.cmsRunNode.cfgInterface = cfgContent
        self.configurations.append(cfgContent)
        return

    def setOriginalCfg(self, honkingGreatString):
        """
        _setOriginalCfg_

        Set the original cfg file content that is to be recorded in DBS

        CALL THIS METHOD AFTER setConfiguration
        
        """
        sep = '\n\n### Next chained config file ###\n\n'
        cfg = ''
        for link in self.cmsRunNode._InputLinks:
            if link['AppearStandalone']:
                prev_config = self.origCfgs.get(link['InputNode'], '')
                if prev_config:
                    cfg = '%s%s%s' % (cfg, prev_config, sep)
        cfg = '%s%s' % (cfg, honkingGreatString)
        self.cmsRunNode.cfgInterface.originalCfg = cfg
        self.origCfgs[self.cmsRunNode.name] = cfg
        return

    def setPSetHash(self, hashValue):
        """
        _setPSetHash_

        Set the value for the PSetHash
        
        If any InputLinks are present their pset hashes are prepended

        """
        hash = ''
        for link in self.cmsRunNode._InputLinks:
            if link['AppearStandalone']:
                prev_node_hash = self.psetHashes.get(link['InputNode'], None)
                if prev_node_hash:  # cmsGen nodes will be missing
                    hash = '%s%s_' % (hash, prev_node_hash)
        hash = '%s%s' % (hash, hashValue)
        self.psetHashes[self.cmsRunNode.name] = hash
        return

    def addInputDataset(self, datasetPath):
        """
        _addInputDataset_

        If this workflow processes a dataset, set that here

        NOTE: Is possible to also specify
            - Split Type (file or event)
            - Split Size (int)
            - input DBS
        Not sure how many of these we want to use.
        For now, they can be added to the inputDataset dictionary
        """
        datasetBits = DatasetConventions.parseDatasetPath(datasetPath)
        self.inputDataset.update(datasetBits)
        self.inputDataset['IsUsed'] = True
        self.inputDataset['DatasetName'] = datasetPath

        return

    def addPileupDataset(self, datasetName, filesPerJob=10, targetModule=None):
        """
        _addPileupDataset_

        Add a dataset to provide pileup overlap.
        filesPerJob should be 1 in 99.9 % of cases

        """
        pileupDataset = {}
        pileupDataset['Primary'] = None
        pileupDataset['Processed'] = None
        pileupDataset['DataTier'] = None
        datasetBits = DatasetConventions.parseDatasetPath(datasetName)
        pileupDataset.update(datasetBits)
        pileupDataset['FilesPerJob'] = filesPerJob
        # Target module coould be 'MixingModule' or 'DataMixingModule' for
        # the moment. If None, MixingModule will be used.
        pileupDataset['TargetModule'] = targetModule
        self.pileupDatasets.append(pileupDataset)
        return

    def addFinalDestination(self, *phedexNodeNames):
        """
        _addFinalDestination_

        Add a final destination that can be used to generate
        a PhEDEx subscription so that the data gets transferred to
        some final location.

        NOTE: Do we want to support a list of PhEDEx nodes? Eg CERN + FNAL

        """
        nameList = ""
        for nodeName in phedexNodeNames:
            nameList += "%s," % nodeName
        nameList = nameList[:-1]
        self.workflow.parameters['PhEDExDestination'] = nameList
        return

    def addSelectionEfficiency(self, selectionEff):
        """
        _addSelectionEfficiency_

        Do we have a selection efficiency?

        """

        self.cmsRunNode.applicationControls["SelectionEfficiency"] = \
                                                             selectionEff
        return

    def setOutputDatasetDbsStatus(self, status):
        """
        _setOutputDatasetDbsStatus_

        The output datasets will have this status in the field dataset.status.
        This value will be use when registering the output dataset in DBS.

        Only two values are acepted:
            - VALID
            - PRODUCTION

        """

        if status in ('VALID', 'PRODUCTION'):
            self.outputDatasetStatus = status

        return

    def makeWorkflow(self):
        """
        _makeWorkflow_

        Call this method to create the workflow spec instance when
        done

        """
        self._Validate()

        #  //
        # // Add Stage Out node
        #//
        self.saveOutputFor.append(self.cmsRunNode.name)
        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1",
                                      *self.saveOutputFor)
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")

        #  //
        # // Input Dataset?
        #//
        if self.inputDataset['IsUsed']:
            inputDataset = self.cmsRunNodes[0].addInputDataset(
                self.inputDataset['Primary'], self.inputDataset['Processed'])
            inputDataset["DataTier"] = self.inputDataset['DataTier']
            for keyname in [
                    'SplitType',
                    'SplitSize',
                    'OnlySites',
                    'OnlyBlocks',
                    'OnlyClosedBlocks',
            ]:
                if self.inputDataset[keyname] != None:
                    self.workflow.parameters[keyname] = self.inputDataset[
                        keyname]

        #  //
        # // Pileup Datasets?
        #//
        for pileupDataset in self.pileupDatasets:
            puDataset = self.cmsRunNodes[0].addPileupDataset(
                pileupDataset['Primary'], pileupDataset['DataTier'],
                pileupDataset['Processed'])
            puDataset['FilesPerJob'] = pileupDataset['FilesPerJob']
            if pileupDataset['TargetModule'] is not None:
                puDataset['TargetModule'] = pileupDataset['TargetModule']

        #  //
        # // Extract dataset info from cfg
        #//
        datasets = {}
        datasetsToForward = {}
        for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations):

            # Ignore nodes that don't save any output. But keep input dataset
            # in case we need to forward it.
            if cmsRunNode.name not in self.saveOutputFor:
                # Store parent dataset in case we need to forward it.
                if self.inputDataset['IsUsed'] and \
                                            cmsRunNode == self.cmsRunNodes[0]:
                    datasetsToForward[cmsRunNode.name] = \
                                            self.inputDataset['DatasetName']
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        # If the previous cmsRunNode stages out, pull down the
                        # dataset it produced.
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasets['%s:%s' % (inputLink['InputNode'],
                                inputLink['OutputModule'])]
                        # If the previous cmsRunNode does not stage out, then
                        # use it's parent.
                        else:
                            # TODO: Wont work if more than one InputLink exists
                            datasetsToForward[cmsRunNode.name] = \
                                datasetsToForward[inputLink['InputNode']]
                continue

            for outModName in config.outputModules.keys():
                moduleInstance = config.getOutputModule(outModName)
                dataTier = moduleInstance['dataTier']
                filterName = moduleInstance["filterName"]
                primaryName = DatasetConventions.primaryDatasetName(
                    PhysicsChannel=self.channel, )

                if self.useProperNamingConventions:
                    if self.processingString and filterName:
                        processingString = "_".join(
                            (self.processingString, filterName))
                    elif self.processingString:
                        processingString = self.processingString
                    elif filterName:
                        processingString = filterName
                    else:
                        processingString = None
                    processedName = DatasetConventions.properProcessedDatasetName(
                        AcquisitionEra=self.acquisitionEra,
                        ProcessingString=processingString,
                        ProcessingVersion=self.processingVersion,
                        Unmerged=True)
                elif self.acquisitionEra == None:
                    processedName = DatasetConventions.processedDatasetName(
                        Version=cmsRunNode.application['Version'],
                        Label=self.label,
                        Group=self.group,
                        FilterName=filterName,
                        RequestId=self.requestId,
                        Unmerged=True)
                else:
                    processedName = DatasetConventions.csa08ProcessedDatasetName(
                        AcquisitionEra=self.acquisitionEra,
                        Conditions=self.workflow.parameters['Conditions'],
                        ProcessingVersion=self.workflow.
                        parameters['ProcessingVersion'],
                        FilterName=filterName,
                        Unmerged=True)

                dataTier = DatasetConventions.checkDataTier(dataTier)

                moduleInstance['primaryDataset'] = primaryName
                moduleInstance['processedDataset'] = processedName

                outDS = cmsRunNode.addOutputDataset(primaryName, processedName,
                                                    outModName)

                outDS['Status'] = self.outputDatasetStatus
                outDS['DataTier'] = dataTier
                outDS["ApplicationName"] = \
                                         cmsRunNode.application["Executable"]
                outDS["ApplicationFamily"] = outModName
                outDS["PhysicsGroup"] = self.group

                # check for input dataset for first node
                if self.inputDataset[
                        'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]:
                    outDS['ParentDataset'] = self.inputDataset['DatasetName']
                # check for staged out intermediates
                elif cmsRunNode != self.cmsRunNodes[0]:
                    for inputLink in cmsRunNode._InputLinks:
                        if not inputLink["AppearStandalone"]:
                            # TODO: Wont work if more than one InputLink exists
                            outDS['ParentDataset'] = datasets[
                                '%s:%s' % (inputLink['InputNode'],
                                           inputLink['OutputModule'])]
                        elif datasetsToForward.get(
                                inputLink['InputNode']) is not None:
                            outDS['ParentDataset'] = \
                                    datasetsToForward[inputLink['InputNode']]

                if self.options['FakeHash']:
                    guid = makeUUID()
                    outDS['PSetHash'] = "hash=%s;guid=%s" % \
                            (self.psetHashes[cmsRunNode.name], guid)
                else:
                    outDS['PSetHash'] = self.psetHashes[cmsRunNode.name]

                # record output in case used as input to a later node
                datasets['%s:%s' % (cmsRunNode.name, outModName)] = \
                                "/%s/%s/%s" % ( outDS['PrimaryDataset'],
                                                  outDS['ProcessedDataset'],
                                                  outDS['DataTier'])

        # optionally remap sibling relationships to parent-child (i.e HLTDEBUG)
        remapParentageForWorkflow(self.workflow)
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow

    def _Validate(self):
        """
        _Validate_

        Private method to test all options are set.

        Throws a WorkflowMakerError if any problems found

        """
        notNoneAttrs = [
            "requestId",
            "label",
            "group",
            "channel",
        ]
        for attrName in notNoneAttrs:
            value = getattr(self, attrName, None)
            if value == None:
                msg = "Attribute Not Set: %s" % attrName
                raise WorkflowMakerError(msg)

        if not len(self.configurations):
            msg = "Attribute Not Set: configurations"
            raise WorkflowMakerError(msg)

        if len(self.configurations) != len(self.cmsswVersions):
            msg = "len(self.configurations) != len(self.cmsswVersions)"
            raise WorkflowMakerError(msg)

        return
コード例 #25
0
class RepackerSetup:
    """
    _RepackerSetup_

    Object to manipulate the Configuration files for a repacker job

    - Extract the details of the repacker job entity stored in the
      config

    - Pull in the lumi server information and add it to the config

    """
    def __init__(self, workflowSpec, jobSpec):

        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(workflowSpec)

        self.jobSpec = JobSpec()
        self.jobSpec.load(jobSpec)

        taskState = TaskState(os.getcwd())
        taskState.loadRunResDB()

        jobSpecFinder = NodeFinder(taskState.taskName())
        self.jobSpec.payload.operate(jobSpecFinder)
        self.jobSpecNode = jobSpecFinder.result

        workflowFinder = NodeFinder(taskState.taskName())
        self.workflowSpec.payload.operate(workflowFinder)
        self.workflowNode = workflowFinder.result

        self.run = None
        self.lumis = []
        self.streamerFiles = []
        self.activeDatasets = []


    def unpackJobEntity(self):
        """
        _unpackJobEntity_

        Get the StreamerJobEntity from the JobSpec node

        """

        repackJobEntity = self.jobSpecNode.cfgInterface.extensions.get('Streamer', None)
        if repackJobEntity == None:
            msg = "No StreamerJobEntity in JobSpec configuration\n"
            msg += "This is required for repacker jobs\n"
            raise RuntimeError, msg

        # Get run and lumi numbers for this job
        self.run = repackJobEntity.data['runNumber']
        self.lumis = repackJobEntity.data['lumiSections']
        print "Repacker Job Handling Run:%s\n LumiSections: %s\n" % (self.run,self.lumis)

        # Sort streamer input by lumi ID for time ordering
        self.streamerFiles = sortByValue(repackJobEntity.data['streamerFiles'])
        msg = "Streamer Files for this job are:\n"
        for strmr in self.streamerFiles:
            msg += "  %s\n" % strmr
        print msg

        # Get list of active datasets for this job
##        self.activeDatasets = repackJobEntity.data['activeOutputModules']
##        msg = "This Job Will repack datasets:\n"
##        for dataset in self.activeDatasets:
##            msg += "  %s\n" % dataset
##        print msg

        return


    def backupPSet(self,filename,process):
        """
        _backupPSet_
        
        Write a backup copy of the current PSet to disk.
        """
        print "Wrote current configurations as %s" % filename
        handle = open(filename, 'w')
        handle.write("import pickle\n")
        handle.write("pickledCfg=\"\"\"%s\"\"\"\n" % pickle.dumps(process))
        handle.write("process = pickle.loads(pickledCfg)\n")
        handle.close()

        return


    def importAndBackupProcess(self):
        """
        _importAndBackupProcess_
        
        Try to import the process object for the job, which is contained in
        PSet.py and save a backup copy of it.
        """
        try:
            from PSet import process
        except ImportError, ex:
            msg = "Failed to import PSet module containing cmsRun Config\n"
            msg += str(ex)
            raise RuntimeError, msg

        print "PSet.py imported"

        self.backupPSet("PSetPreRepack.log",process)

        return process
コード例 #26
0
class PromptRecoWorkflow(FactoryInterface):
    """
    _PromptWorkflow_

    Factory to build workflows for PromptReco jobs.
    """
    def __init__(self, runNumber, version, cmsPath, scramArch):
        FactoryInterface.__init__(self, version, cmsPath, scramArch)
        self.run = runNumber
        self.workflow = None
        self.timestamp = None
        self.cmsRunNode = None
        self.workflowName = None
        self.configFile = None
        self.useLazyDownload = False
        self.primaryDataset = None
        self.processedDataset = None
        self.parentProcessedDataset = None
        self.acquisitionEra = None
        self.processingVersion = None

    def setConfigFile(self, configFile):
        """
        _setConfigFile_

        Set the config file that will be loaded into the workflow.
        """
        self.configFile = configFile

    def setPrimaryDataset(self, primaryDatasetName):
        """
        _setPrimaryDataset_

        Set the primary dataset that this workflow will run over.
        """
        self.primaryDataset = primaryDatasetName
        return

    def setProcessedDataset(self, processedDatasetName):
        """
        _setProcessedDataset_

        Set the processed dataset that this workflow will produce.
        """
        self.processedDataset = processedDatasetName
        return

    def setParentProcessedDataset(self, parentProcessedDatasetName):
        """
        _setParentProcessedDataset_

        Set the parent processed dataset for this workflow.
        """
        self.parentProcessedDataset = parentProcessedDatasetName
        return    

    def setAcquisitionEra(self, acquisitionEra):
        """
        _setAcquisitionEra_

        Set the acquisition era.
        """
        self.acquisitionEra = acquisitionEra
        return

    def setProcessingVersion(self, processingVersion):
        """
        _setProcessingVersion_

        Set the processing version.
        """
        self.processingVersion = processingVersion
        return

    def setLazyDownload(self, useLazyDownload):
        """
        _setLazyDownload_

        This enables/disables lazy download mode in the framework.
        """
        self.useLazyDownload = useLazyDownload
    
    def setupOutputModule(self, outputModuleName, dataTier):
        """
        _setupOutputModule_

        Create the outputModule and outputDataset sections of the workflow.
        """
        outputDataset = self.cmsRunNode.addOutputDataset(self.primaryDataset,
                                                         self.processedDataset,
                                                         outputModuleName)
        outputDataset["NoMerge"] = "True"
        outputDataset["DataTier"] = dataTier
        outputDataset["ApplicationName"] = "cmsRun"
        outputDataset["ApplicationProject"] = "CMSSW"
        outputDataset["ApplicationVersion"] = self.cmssw["CMSSWVersion"]
        outputDataset["ApplicationFamily"] = outputModuleName
        outputDataset["ParentDataset"] = "/%s/%s/%s" % (self.primaryDataset,
                                                        self.parentProcessedDataset,
                                                        "RAW")

        cfgWrapper = self.workflow.payload.cfgInterface
        outputModule = cfgWrapper.getOutputModule(outputModuleName)

        outputModule["catalog"] = '%s-Catalog.xml' % outputModule['Name']
        outputModule["primaryDataset"] = self.primaryDataset
        outputModule["processedDataset"] = self.processedDataset
        outputModule["dataTier"] = dataTier
        outputModule["acquisitionEra"] = self.acquisitionEra
        outputModule["processingVersion"] = self.processingVersion

        outputDataset["LFNBase"] = getLFN(outputModule, self.run, Unmerged = True)
        outputDataset["MergedLFNBase"] = getLFN(outputModule, self.run)
        outputModule["LFNBase"] = outputDataset["LFNBase"]
        outputModule["MergedLFNBase"] = outputDataset["MergedLFNBase"]

        outputModule["fileName"] = "%s.root" % outputModule['Name']

        outputModule["logicalFileName"] = os.path.join(
            outputDataset["LFNBase"], "PromptReco.%s.root" % dataTier)

        return
    
    def makeWorkflow(self):
        """
        _makeWorkflow_

        Generate a workflow.  If the self.configFile parameter has been set
        this will attempt to load the config from file, otherwise it will
        create an empty process object which will get filled in by the runtime
        script.
        """
        self.timestamp = int(time.time())
        self.workflow = WorkflowSpec()
        self.workflowName = "PromptReco-Run%s-%s" % (self.run,
                                                     self.primaryDataset)

        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("data")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters["WorkflowType"] = "Processing"
        self.workflow.parameters["ProdRequestID"] = self.run
        self.workflow.parameters["RunNumber"] = self.run
        self.workflow.parameters["CMSSWVersion"] = self.cmssw["CMSSWVersion"] 
        self.workflow.parameters["ScramArch"] = self.cmssw["ScramArch"] 
        self.workflow.parameters["CMSPath"] = self.cmssw["CMSPath"]

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"
        self.cmsRunNode.application["Version"] = self.cmssw["CMSSWVersion"]
        self.cmsRunNode.application["Executable"] = "cmsRun"
        self.cmsRunNode.application["Project"] = "CMSSW"
        self.cmsRunNode.application["Architecture"] = self.cmssw["ScramArch"]

        inputDataset = self.cmsRunNode.addInputDataset(self.primaryDataset,
                                                       self.parentProcessedDataset)
        inputDataset["DataTier"] = "RAW"
        
        if self.configFile == None:
            self.loadProcessFromFramework()            
        else:
            self.loadProcessFromFile()

        WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1")
        WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive")
        WorkflowTools.generateFilenames(self.workflow)

        return self.workflow

    def loadProcessFromFile(self):
        """
        _loadProcessFromFile_

        Load the config file into the workflow.
        """
        preExecScript = self.cmsRunNode.scriptControls["PreExe"]
        preExecScript.append("T0.PromptRecoInjector.RuntimePromptReco")
        
        cfgBaseName = os.path.basename(self.configFile).replace(".py", "")
        cfgDirName = os.path.dirname(self.configFile)
        modPath = imp.find_module(cfgBaseName, [cfgDirName])

        loader = CMSSWAPILoader(self.cmssw["ScramArch"],
                                self.cmssw["CMSSWVersion"],
                                self.cmssw["CMSPath"])
        
        try:
            loader.load()
        except Exception, ex:
            logging.error("Couldn't load CMSSW libraries: %s" % ex)
            return None
        
        try:
            modRef = imp.load_module(cfgBaseName, modPath[0],
                                     modPath[1], modPath[2])
        except Exception, ex:
            logging.error("Can't load config: %s" % ex)
            loader.unload()
            return None
コード例 #27
0
from ProdCommon.FwkJobRep.MergeReports import updateReport
import StageOut.Utilities as StageOutUtils
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec

if __name__ == '__main__':
    msg = "******RuntimeStageOutFailure Invoked*****"

    state = TaskState(os.getcwd())
    state.loadRunResDB()
    config = state.configurationDict()

    #  //
    # // find inputs by locating the task for which we are staging out
    #//  and loading its TaskState

    workflow = WorkflowSpec()
    workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC'])
    stageOutFor, override, controls = StageOutUtils.getStageOutConfig(
        workflow, state.taskName())

    inputTasks = stageOutFor
    for inputTask in inputTasks:

        inputState = getTaskState(inputTask)
        if inputState == None:
            msg = "Input State: %s Not found, skipping..." % inputTask
            continue

        inputReport = inputState.getJobReport()

        inputReport.status = "Failed"
コード例 #28
0
def createLogCollectorWorkflowSpec(wf):
    """
    _createLogColectorWorkflowSpec_

    Create a generic LogArchive WorkflowSpec definition

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("LogCollect-%s" % timestamp)
    workflow.setActivity("LogCollect")
    workflow.setRequestCategory("logcollect")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters['WorkflowType'] = "LogCollect"

    logArchive = workflow.payload
    logArchive.name = "logCollect1"
    logArchive.type = "LogCollect"
    #TODO: remove this?
    #logArchive.workflow = wf
    logArchive.configuration
    logArchive.application["Project"] = ""
    logArchive.application["Version"] = ""
    logArchive.application["Architecture"] = ""
    logArchive.application[
        "Executable"] = "RuntimeLogCollector.py"  # binary name
    logArchive.configuration = ""
    logArchive.cfgInterface = None

    #set stageOut override
    #cfg = IMProvNode("config")
    #stageOut = IMProvNode("StageOutParameters")
    #cfg.addNode()
    #WorkflowTools.addStageOutNode(logArchive, "StageOut1")
    #WorkflowTools.addStageOutOverride(logArchive, stageOutParams['command'],
    #                                  stageOutParams['option'],
    #                                  stageOutParams['se-name'],
    #                                  stageOutParams['lfnPrefix'])

    return workflow
コード例 #29
0
def createCleanupWorkflowSpec():
    """
    _createCleanupWorkflowSpec_

    Create a generic cleanup WorkflowSpec definition
    that can be used to generate a sanbox for cleanup jobs

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("CleanUp-%s" % timestamp)
    workflow.setActivity("CleanUp")
    workflow.setRequestCategory("mc-cleanup")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters['WorkflowType'] = "CleanUp"

    cleanUp = workflow.payload
    cleanUp.name = "cleanUp1"
    cleanUp.type = "CleanUp"

    cleanUp.application["Project"] = ""
    cleanUp.application["Version"] = ""
    cleanUp.application["Architecture"] = ""
    cleanUp.application["Executable"] = "RuntimeCleanUp.py"  # binary name
    cleanUp.configuration = ""
    cleanUp.cfgInterface = None

    return workflow
コード例 #30
0
ファイル: DBSPlugin.py プロジェクト: giffels/PRODAGENT
    def __call__(self, collectPayload):
        """
        _operator(collectPayload)_

        Given the dataset and run in the payload, callout to DBS
        to find the files to be harvested

        """
        msg = "DBSPlugin invoked for %s" % str(collectPayload)
        logging.info(msg)


        site = self.args.get("Site", "srm.cern.ch")

        baseCache = os.path.join(self.args['ComponentDir'],
                                 "DBSPlugin")
        if not os.path.exists(baseCache):
            os.makedirs(baseCache)

        datasetCache = os.path.join(baseCache,
                                    collectPayload['PrimaryDataset'],
                                    collectPayload['ProcessedDataset'],
                                    collectPayload['DataTier'])

        if not os.path.exists(datasetCache):
            os.makedirs(datasetCache)

        workflowFile = os.path.join(
            datasetCache,
            "%s-%s-%s-DQMHarvest-Workflow.xml" % (
            collectPayload['PrimaryDataset'],
            collectPayload['ProcessedDataset'],
            collectPayload['DataTier'])
            )
        if not os.path.exists(workflowFile):
            msg = "No workflow found for dataset: %s\n " % (
                collectPayload.datasetPath(),)
            msg += "Looking up software version and generating workflow..."

            if self.args.get("OverrideGlobalTag", None) == None:
                globalTag = findGlobalTagForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'],
                    collectPayload['RunNumber'])
            else:
                globalTag = self.args['OverrideGlobalTag']


            if self.args.get("OverrideCMSSW", None) != None:
                cmsswVersion = self.args['OverrideCMSSW']
                msg = "Using Override for CMSSW Version %s" % (
                    self.args['OverrideCMSSW'],)
                logging.info(msg)
            else:
                cmsswVersion = findVersionForDataset(
                    self.dbsUrl,
                    collectPayload['PrimaryDataset'],
                    collectPayload['ProcessedDataset'],
                    collectPayload['DataTier'],
                    collectPayload['RunNumber'])
                msg = "Found CMSSW Version for dataset/run\n"
                msg += " Dataset %s Run %s\n" % (collectPayload.datasetPath(),
                                                 collectPayload['RunNumber'])
                msg += " CMSSW Version = %s\n " % cmsswVersion
                logging.info(msg)

            workflowSpec = createHarvestingWorkflow(
                collectPayload.datasetPath(),
                site,
                self.args['CmsPath'],
                self.args['ScramArch'],
                cmsswVersion,
                globalTag,
                configFile=self.args['ConfigFile'],
                DQMServer=self.args['DQMServer'],
                proxyLocation=self.args['proxyLocation'],
                DQMCopyToCERN=self.args['DQMCopyToCERN'],
                doStageOut=self.args['DoStageOut'])

            workflowSpec.save(workflowFile)
            msg = "Created Harvesting Workflow:\n %s" % workflowFile
            logging.info(msg)
            self.publishWorkflow(workflowFile, workflowSpec.workflowName())
        else:
            msg = "Loading existing workflow for dataset: %s\n " % (
                collectPayload.datasetPath(),)
            msg += " => %s\n" % workflowFile
            logging.info(msg)

            workflowSpec = WorkflowSpec()
            workflowSpec.load(workflowFile)




        job = {}
        jobSpec = workflowSpec.createJobSpec()
        jobName = "%s-%s-%s" % (
            workflowSpec.workflowName(),
            collectPayload['RunNumber'],
            time.strftime("%H-%M-%S-%d-%m-%y")
            )

        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Harvesting")
        jobSpec.parameters['RunNumber'] = collectPayload['RunNumber']
        jobSpec.addWhitelistSite(site)
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))



        jobSpec.payload.cfgInterface.inputFiles.extend(
            listFilesInRun(
            DBSReader(self.dbsUrl),
            collectPayload['PrimaryDataset'],
            collectPayload['ProcessedDataset'],
            collectPayload['DataTier'],
            collectPayload['RunNumber'])
            )

        specCacheDir =  os.path.join(
            datasetCache, str(int(collectPayload['RunNumber']) // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)

        jobSpec.save(jobSpecFile)


        job["JobSpecId"] = jobName
        job["JobSpecFile"] = jobSpecFile
        job['JobType'] = "Harvesting"
        job["WorkflowSpecId"] = workflowSpec.workflowName(),
        job["WorkflowPriority"] = 10
        job["Sites"] = [site]
        job["Run"] = collectPayload['RunNumber']
        job['WorkflowSpecFile'] = workflowFile



        msg = "Harvesting Job Created for\n"
        msg += " => Run:       %s\n" % collectPayload['RunNumber']
        msg += " => Primary:   %s\n" % collectPayload['PrimaryDataset']
        msg += " => Processed: %s\n" % collectPayload['ProcessedDataset']
        msg += " => Tier:      %s\n" % collectPayload['DataTier']
        msg += " => Workflow:  %s\n" % job['WorkflowSpecId']
        msg += " => Job:       %s\n" % job['JobSpecId']
        msg += " => Site:      %s\n" % job['Sites']
        logging.info(msg)

        return [job]
コード例 #31
0
    def __init__(self, requestId, channel, label):
        self.requestId = requestId
        self.group = None
        self.label = label
        self.timestamp = int(time.time())
        self.channel = channel
        self.cmsswVersions = []
        self.configurations = []
        self.psetHashes = {}
        self.origCfgs = {}
        self.acquisitionEra = None
        self.processingString = None
        self.processingVersion = None
        self.conditions = None

        # turn on use of proper naming convention for datasets
        # should be made the default soon, lets deprecate all the old crap
        self.useProperNamingConventions = False

        self.options = {}
        self.options.setdefault('FakeHash', False)

        # Should we use another attribute for setting the output dataset
        # status in DBS?
        self.outputDatasetStatus = 'VALID'

        self.inputDataset = {}
        self.inputDataset['IsUsed'] = False
        self.inputDataset['DatasetName'] = None
        self.inputDataset['Primary'] = None
        self.inputDataset['Processed'] = None
        self.inputDataset['DataTier'] = None
        #  //
        # // Extra controls over input dataset if required
        #//
        self.inputDataset['SplitType'] = None
        self.inputDataset['SplitSize'] = None
        self.inputDataset['OnlySites'] = None
        self.inputDataset['OnlyBlocks'] = None
        self.inputDataset['OnlyClosedBlocks'] = True

        #  //
        # // Pileup Dataset controls
        #//
        self.pileupDatasets = []

        #  //
        # // Initialise basic workflow
        #//
        self.workflow = WorkflowSpec()
        self.workflowName = "%s-%s-%s" % (label, channel, requestId)
        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("mc")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters['RequestLabel'] = self.label
        self.workflow.parameters['ProdRequestID'] = self.requestId

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"

        self.cmsRunNodes = [self.cmsRunNode]
        self.saveOutputFor = []
コード例 #32
0
ファイル: RequestIterator.py プロジェクト: giffels/PRODAGENT
class RequestIterator:
    """
    _RequestIterator_

    Working from a Generic Workflow template, generate
    concrete jobs from it, keeping in-memory history

    """
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.count = 0
        self.runIncrement = 1
        self.currentJob = None
        self.sitePref = None
        self.pileupDatasets = {}
        self.ownedJobSpecs = {}
        
        #  //
        # // Initially hard coded, should be extracted from Component Config
        #//
        self.eventsPerJob = 10 
        
        self.workflowSpec = WorkflowSpec()
        try:
         self.workflowSpec.load(workflowSpecFile)
        except:
         logging.error("ERROR Loading Workflow: %s " % (workflowSpecFile))
         return

        if self.workflowSpec.parameters.get("RunIncrement", None) != None:
            self.runIncrement = int(
                self.workflowSpec.parameters['RunIncrement']
                )

    
        self.generators = GeneratorMaker()
        self.workflowSpec.payload.operate(self.generators)
        
        
        
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
        


    def loadPileupDatasets(self):
        """
        _loadPileupDatasets_

        Are we dealing with pileup? If so pull in the file list
        
        """
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            self.pileupDatasets = createPileupDatasets(self.workflowSpec)
        return

    def loadPileupSites(self):
        """
        _loadPileupSites_
                                                                                                              
        Are we dealing with pileup? If so pull in the site list
                                                                                                              
        """
        sites = []
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            sites = getPileupSites(self.workflowSpec)
        return sites
                                                                                                              


            
    def __call__(self):
        """
        _operator()_

        When called generate a new concrete job payload from the
        generic workflow and return it.

        """
        newJobSpec = self.createJobSpec()
        self.count += self.runIncrement
        return newJobSpec


    def createJobSpec(self):
        """
        _createJobSpec_

        Load the WorkflowSpec object and generate a JobSpec from it

        """
        
        jobSpec = self.workflowSpec.createJobSpec()
        jobName = "%s-%s" % (
            self.workflowSpec.workflowName(),
            self.count,
            )
        self.currentJob = jobName
        jobSpec.setJobName(jobName)
        jobSpec.setJobType("Processing")
        jobSpec.parameters['RunNumber'] = self.count

        
        jobSpec.payload.operate(DefaultLFNMaker(jobSpec))
        jobSpec.payload.operate(self.generateJobConfig)
        jobSpec.payload.operate(self.generateCmsGenConfig)
        specCacheDir =  os.path.join(
            self.specCache, str(self.count // 1000).zfill(4))
        if not os.path.exists(specCacheDir):
            os.makedirs(specCacheDir)
        jobSpecFile = os.path.join(specCacheDir,
                                   "%s-JobSpec.xml" % jobName)
        self.ownedJobSpecs[jobName] = jobSpecFile

        
        #  //
        # // Add site pref if set
        #//
        if self.sitePref != None:
#
#AF:  Allow site pref to be a comma separated list of sites, each one
#     added in the Whitelist:
#            jobSpec.addWhitelistSite(self.sitePref)
          for siteWhite in self.sitePref.split(","): 
            jobSpec.addWhitelistSite(siteWhite)
            
        jobSpec.save(jobSpecFile)        
        return jobSpecFile
        
        
    def generateJobConfig(self, jobSpecNode):
        """
        _generateJobConfig_
        
        Operator to act on a JobSpecNode tree to convert the template
        config file into a JobSpecific Config File
                
        """
        if jobSpecNode.name not in self.generators.keys():
            return
        generator = self.generators[jobSpecNode.name]

        useOutputMaxEv = False
        if jobSpecNode.cfgInterface != None:
            outMaxEv = jobSpecNode.cfgInterface.maxEvents['output']
            if outMaxEv != None:
                useOutputMaxEv = True

        if useOutputMaxEv:
            jobCfg = generator(self.currentJob,
                               maxEventsWritten = self.eventsPerJob,
                               firstRun = self.count)
        else:
            jobCfg = generator(self.currentJob,
                               maxEvents = self.eventsPerJob,
                               firstRun = self.count)
        
        #  //
        # // Is there pileup for this node?
        #//
        if self.pileupDatasets.has_key(jobSpecNode.name):
            puDataset = self.pileupDatasets[jobSpecNode.name]
            logging.debug("Node: %s has a pileup dataset: %s" % (
                jobSpecNode.name,  puDataset.dataset,
                ))
            fileList = puDataset.getPileupFiles()
            jobCfg.pileupFiles = fileList

            
            
        
        jobSpecNode.cfgInterface = jobCfg
        return


    def generateCmsGenConfig(self, jobSpecNode):
        """
        _generateCmsGenConfig_

        Process CmsGen type nodes to insert maxEvents and run numbers
        for cmsGen jobs

        """
        if jobSpecNode.type != "CmsGen":
            return

        jobSpecNode.applicationControls['firstRun'] = self.count
        jobSpecNode.applicationControls['maxEvents'] = self.eventsPerJob
        jobSpecNode.applicationControls['randomSeed'] = randomSeed()
        jobSpecNode.applicationControls['fileName'] = "%s-%s.root" % (
            self.currentJob, jobSpecNode.name)
        jobSpecNode.applicationControls['logicalFileName'] = "%s-%s.root" % (
            self.currentJob, jobSpecNode.name)
        return
        

    def removeSpec(self, jobSpecId):
        """
        _removeSpec_

        Remove a Spec file when it has been successfully injected

        """
        if jobSpecId not in self.ownedJobSpecs.keys():
            return

        logging.info("Removing JobSpec For: %s" % jobSpecId)
        filename = self.ownedJobSpecs[jobSpecId]
        if os.path.exists(filename):
            os.remove(filename)
            del self.ownedJobSpecs[jobSpecId]
        return

        


    def save(self, directory):
        """
        _save_

        Persist this objects state into an XML file and save
        it in the directory provided

        """
        doc = IMProvDoc("RequestIterator")
        node = IMProvNode(self.workflowSpec.workflowName())
        doc.addNode(node)

        node.addNode(IMProvNode("Run", None, Value = str(self.count)))
        node.addNode(
            IMProvNode("EventsPerJob", None, Value = str(self.eventsPerJob))
            )
        node.addNode(IMProvNode("SitePref", None, Value = str(self.sitePref)))

        pu = IMProvNode("Pileup")
        node.addNode(pu)
        for key, value in self.pileupDatasets.items():
            puNode = value.save()
            puNode.attrs['PayloadNode'] = key
            pu.addNode(puNode)

        specs = IMProvNode("JobSpecs")
        node.addNode(specs)
        for key, val in self.ownedJobSpecs.items():
            specs.addNode(IMProvNode("JobSpec", val, ID = key))
            
        fname = os.path.join(
            directory,
            "%s-Persist.xml" % self.workflowSpec.workflowName()
            )
        handle = open(fname, 'w')
        handle.write(doc.makeDOMDocument().toprettyxml())
        handle.close()
        return


    def load(self, directory):
        """
        _load_

        Load this instance given the workflow and directory containing
        the persistency file

        """
        fname = os.path.join(
            directory,
            "%s-Persist.xml" % self.workflowSpec.workflowName()
            )

        try:
            node = loadIMProvFile(fname)
        except Exception, ex:
            msg = "ERROR: Corrupted Persistency File:\n"
            msg += "  => %s\n" % fname
            msg += "Cannot be read:\n  => %s\n" % str(ex)
            logging.error(msg)
            return
        

        qbase = "/RequestIterator/%s" % self.workflowSpec.workflowName()

        runQ = IMProvQuery("%s/Run[attribute(\"Value\")]" % qbase)
        eventQ = IMProvQuery("%s/EventsPerJob[attribute(\"Value\")]" % qbase)
        siteQ = IMProvQuery("%s/SitePref[attribute(\"Value\")]" % qbase)

        runVal = int(runQ(node)[-1])
        eventVal = int(eventQ(node)[-1])
        siteVal = str(siteQ(node)[-1])
        if siteVal.lower() == "none":
            siteVal = None

        self.count = runVal
        self.eventsPerJob = eventVal
        self.sitePref = siteVal
        
        puQ = IMProvQuery("%s/Pileup/*" % qbase)
        puNodes = puQ(node)
        for puNode in puNodes:
            payloadNode = str(puNode.attrs.get("PayloadNode"))
            puDataset = PileupDataset("dummy", 1)
            puDataset.load(puNode)
            self.pileupDatasets[payloadNode] = puDataset

        specQ = IMProvQuery("%s/JobSpecs/*" % qbase)
        specNodes = specQ(node)
        for specNode in specNodes:
            specId = str(specNode.attrs['ID'])
            specFile = str(specNode.chardata).strip()
            self.ownedJobSpecs[specId] = specFile

        return
コード例 #33
0
ファイル: MergeTools.py プロジェクト: jlexternal/WPrime13TeV
def createMergeJobWorkflow(procSpec,
                           isFastMerge=True,
                           doCleanUp=True,
                           littleE=False):
    """
    _createMergeJobWorkflow_

    Given a Processing Workflow, generate a set of Merge Job
    workflows that can be used to generate actual merge jobs 
    (as opposed to creating datasets like createMergeDatasetWorkflow)

    returns a dictionary of (input, IE MergeSensor watched) dataset name
    to workflow spec instances

    """
    mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge)
    mergeDatasets = mergeDatasetWF.outputDatasets()

    results = {}

    procSpecName = procSpec.workflowName()

    for dataset in mergeDatasets:
        inputDataset = dataset['ParentDataset']

        newWF = WorkflowSpec()
        newWF.parameters.update(procSpec.parameters)
        newWF.setWorkflowName(procSpecName)
        newWF.parameters['WorkflowType'] = "Merge"

        cmsRunNode = newWF.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Version"] = dataset['ApplicationVersion']
        cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323"

        #  //
        # // Hack to forward UserSandbox to Merge Jobs
        #//
        userSandbox = dataset.get("UserSandbox", None)
        if userSandbox != None:
            cmsRunNode.userSandbox = userSandbox

        #if isFastMerge == True:
        #    if littleE:
        #        cmsRunNode.application["Executable"] = "edmFastMerge"
        #    else:
        #        cmsRunNode.application["Executable"] = _FastMergeBinary
        #    outputModuleName = "EdmFastMerge"
        #else:
        cmsRunNode.application["Executable"] = "cmsRun"
        outputModuleName = "Merged"

        #  //
        # // Input Dataset
        #//
        datasetBits = DatasetConventions.parseDatasetPath(inputDataset)
        inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'],
                                               datasetBits['Processed'])
        inDataset["DataTier"] = datasetBits['DataTier']

        #  //
        # // Output Dataset
        #//

        outputDataset = cmsRunNode.addOutputDataset(
            dataset['PrimaryDataset'], dataset['ProcessedDataset'],
            outputModuleName)

        outputDataset["DataTier"] = dataset['DataTier']
        outputDataset["PSetHash"] = dataset['PSetHash']

        outputDataset["ApplicationName"] = \
                    cmsRunNode.application["Executable"]
        outputDataset["ApplicationProject"] = \
                    cmsRunNode.application["Project"]
        outputDataset["ApplicationVersion"] = \
                    cmsRunNode.application["Version"]
        outputDataset["ApplicationFamily"] = outputModuleName
        outputDataset["PhysicsGroup"] = \
                      procSpec.parameters.get('PhysicsGroup', None)
        outputDataset['ParentDataset'] = inputDataset

        #  //
        # // Add Stage Out node
        #//
        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        if doCleanUp == True:
            WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1")

        #  //
        # // Add log archive node
        #//
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")

        WorkflowTools.generateFilenames(newWF)

        results[inputDataset] = newWF

    return results
コード例 #34
0
ファイル: DatasetIterator.py プロジェクト: giffels/PRODAGENT
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.currentJob = None
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(workflowSpecFile)
        self.currentJobDef = None
        self.count = 0
        self.onlyClosedBlocks = False
        if  self.workflowSpec.parameters.has_key("OnlyClosedBlocks"):
            onlyClosed =  str(
                self.workflowSpec.parameters["OnlyClosedBlocks"]).lower()
            if onlyClosed == "true":
                self.onlyClosedBlocks = True
        self.ownedJobSpecs = {}
        self.allowedBlocks = []
        self.allowedSites = []
        self.dbsUrl = getLocalDBSURL()
        self.splitType = \
                self.workflowSpec.parameters.get("SplitType", "file").lower()
        self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1))

        self.generators = GeneratorMaker()
        self.generators(self.workflowSpec.payload)

        self.pileupDatasets = {}
        #  //
        # // Does the workflow contain a block restriction??
        #//
        blockRestriction = \
             self.workflowSpec.parameters.get("OnlyBlocks", None)
        if blockRestriction != None:
            #  //
            # // restriction on blocks present, populate allowedBlocks list
            #//
            msg = "Block restriction provided in Workflow Spec:\n"
            msg += "%s\n" % blockRestriction
            blockList = blockRestriction.split(",")
            for block in blockList:
                if len(block.strip() ) > 0:
                    self.allowedBlocks.append(block.strip())

        #  //
        # // Does the workflow contain a site restriction??
        #//
        siteRestriction = \
           self.workflowSpec.parameters.get("OnlySites", None)          
        if siteRestriction != None:
            #  //
            # // restriction on sites present, populate allowedSites list
            #//
            msg = "Site restriction provided in Workflow Spec:\n"
            msg += "%s\n" % siteRestriction
            siteList = siteRestriction.split(",")
            for site in siteList:
                if len(site.strip() ) > 0:
                    self.allowedSites.append(site.strip())

        #  //
        # // Is the DBSURL contact information provided??
        #//

        value = self.workflowSpec.parameters.get("DBSURL", None)
        if value != None:
            self.dbsUrl = value

        if self.dbsUrl == None:
            msg = "Error: No DBSURL available for dataset:\n"
            msg += "Cant get local DBSURL and one not provided with workflow"
            raise RuntimeError, msg
            
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
コード例 #35
0
ファイル: DatasetIterator.py プロジェクト: giffels/PRODAGENT
class DatasetIterator:
    """
    _DatasetIterator_

    Working from a Generic Workflow template, generate
    concrete jobs from it, keeping in-memory history

    """
    def __init__(self, workflowSpecFile, workingDir):
        self.workflow = workflowSpecFile
        self.workingDir = workingDir
        self.currentJob = None
        self.workflowSpec = WorkflowSpec()
        self.workflowSpec.load(workflowSpecFile)
        self.currentJobDef = None
        self.count = 0
        self.onlyClosedBlocks = False
        if  self.workflowSpec.parameters.has_key("OnlyClosedBlocks"):
            onlyClosed =  str(
                self.workflowSpec.parameters["OnlyClosedBlocks"]).lower()
            if onlyClosed == "true":
                self.onlyClosedBlocks = True
        self.ownedJobSpecs = {}
        self.allowedBlocks = []
        self.allowedSites = []
        self.dbsUrl = getLocalDBSURL()
        self.splitType = \
                self.workflowSpec.parameters.get("SplitType", "file").lower()
        self.splitSize = int(self.workflowSpec.parameters.get("SplitSize", 1))

        self.generators = GeneratorMaker()
        self.generators(self.workflowSpec.payload)

        self.pileupDatasets = {}
        #  //
        # // Does the workflow contain a block restriction??
        #//
        blockRestriction = \
             self.workflowSpec.parameters.get("OnlyBlocks", None)
        if blockRestriction != None:
            #  //
            # // restriction on blocks present, populate allowedBlocks list
            #//
            msg = "Block restriction provided in Workflow Spec:\n"
            msg += "%s\n" % blockRestriction
            blockList = blockRestriction.split(",")
            for block in blockList:
                if len(block.strip() ) > 0:
                    self.allowedBlocks.append(block.strip())

        #  //
        # // Does the workflow contain a site restriction??
        #//
        siteRestriction = \
           self.workflowSpec.parameters.get("OnlySites", None)          
        if siteRestriction != None:
            #  //
            # // restriction on sites present, populate allowedSites list
            #//
            msg = "Site restriction provided in Workflow Spec:\n"
            msg += "%s\n" % siteRestriction
            siteList = siteRestriction.split(",")
            for site in siteList:
                if len(site.strip() ) > 0:
                    self.allowedSites.append(site.strip())

        #  //
        # // Is the DBSURL contact information provided??
        #//

        value = self.workflowSpec.parameters.get("DBSURL", None)
        if value != None:
            self.dbsUrl = value

        if self.dbsUrl == None:
            msg = "Error: No DBSURL available for dataset:\n"
            msg += "Cant get local DBSURL and one not provided with workflow"
            raise RuntimeError, msg
            
        #  //
        # // Cache Area for JobSpecs
        #//
        self.specCache = os.path.join(
            self.workingDir,
            "%s-Cache" %self.workflowSpec.workflowName())
        if not os.path.exists(self.specCache):
            os.makedirs(self.specCache)
        
        
    def __call__(self, jobDef):
        """
        _operator()_

        When called generate a new concrete job payload from the
        generic workflow and return it.
        The JobDef should be a JobDefinition with the input details
        including LFNs and event ranges etc.

        """
        newJobSpec = self.createJobSpec(jobDef)
        self.count += 1
        return newJobSpec


    def loadPileupDatasets(self):
        """
        _loadPileupDatasets_
        
        Are we dealing with pileup? If so pull in the file list
        
        """
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            self.pileupDatasets = createPileupDatasets(self.workflowSpec)
        return

    def loadPileupSites(self):
        """
        _loadPileupSites_
                                                                                                              
        Are we dealing with pileup? If so pull in the site list
                                                                                                              
        """
        sites = []
        puDatasets = self.workflowSpec.pileupDatasets()
        if len(puDatasets) > 0:
            logging.info("Found %s Pileup Datasets for Workflow: %s" % (
                len(puDatasets), self.workflowSpec.workflowName(),
                ))
            sites = getPileupSites(self.workflowSpec)
        return sites
                               

    def inputDataset(self):
        """
        _inputDataset_

        Extract the input Dataset from this workflow

        """
        topNode = self.workflowSpec.payload
        try:
            inputDataset = topNode._InputDatasets[-1]
        except StandardError, ex:
            msg = "Error extracting input dataset from Workflow:\n"
            msg += str(ex)
            logging.error(msg)
            return None

        return inputDataset.name()
コード例 #36
0
ファイル: removeWorkflow.py プロジェクト: giffels/PRODAGENT
"""

import sys, os

from ProdAgentDB.Config import defaultConfig as dbConfig
from ProdCommon.Database import Session
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec
from ProdAgentCore.Configuration import loadProdAgentConfiguration

from MergeSensor.MergeSensorDB import MergeSensorDB
from JobQueue.JobQueueDB import JobQueueDB
import ProdAgent.WorkflowEntities.Aux as WEAux
import ProdAgent.WorkflowEntities.Workflow as WEWorkflow

workflow = sys.argv[1]
workflowSpec = WorkflowSpec()
workflowSpec.load(workflow)

#  //
# // Clean out job cache
#//
config = loadProdAgentConfiguration()
compCfg = config.getConfig("JobCreator")
creatorCache = os.path.expandvars(compCfg['ComponentDir'])

workflowCache = os.path.join(creatorCache, workflowSpec.workflowName())
if os.path.exists(workflowCache):
    os.system("/bin/rm -rf %s" % workflowCache)


コード例 #37
0
ファイル: fixDB.py プロジェクト: giffels/PRODAGENT
logFormatter = logging.Formatter("%(asctime)s:%(message)s")
logHandler.setFormatter(logFormatter)
logging.getLogger().addHandler(logHandler)
logging.getLogger().setLevel(logging.INFO)
Dataset.setLogging(logging)

database = MergeSensorDB()
Dataset.setDatabase(database)

workflowFile = sys.argv[1]

print "Updating DB for workflow: ", workflowFile

# read the WorkflowSpecFile
try:
    wfile = WorkflowSpec()
    wfile.load(workflowFile)

# wrong dataset file
except Exception, msg:
    print "Error loading workflow specifications from %s: %s" \
          % (workflowFile, msg)
    sys.exit(1)

# get output modules
try:
    outputDatasetsList = wfile.outputDatasets()

    outputModules = [outDS['OutputModuleName'] \
                     for outDS in outputDatasetsList]
コード例 #38
0
ファイル: RuntimeLogArch.py プロジェクト: giffels/PRODAGENT
    def __init__(self):
        self.state = TaskState(os.getcwd())
        self.state.loadRunResDB()
        self.state.loadJobSpecNode()

        #  //
        # // check for store fail settings
        #//
        self.workflow = WorkflowSpec()
        self.workflow.load(os.environ['PRODAGENT_WORKFLOW_SPEC'])
        self.doStoreFail = self.workflow.parameters.get("UseStoreFail", False)
        if str(self.doStoreFail).lower() == "true":
            self.doStoreFail = True

        self.config = self.state.configurationDict()

        self.inputTasks = self.config.get("InputTasks", [])
#        #TODO: not really sure this is correct, i would think i should
#        # take report from stageOut but that is missing if cmsRun fails
#        # what if cmsRun one is missing - do i want to generate an empty one?
#        self.inputReport = getTaskState(self.inputTasks[0]).getJobReport()

        # iterate over input tasks (in reverse order)
        # find first one with a fjr
        self.inputTask, self.inputReport = None, None
        for taskName in self.inputTasks[::-1]:
            task = getTaskState(taskName)
            report = task.getJobReport()
            if report is None:
                continue
            self.inputTask = task
            self.inputReport = report
            break

        # if got no valid fjr from previous tasks -
        # something must have gone wrong earlier - make our own
        # may need more things set here to make reports mergeable
        if self.inputReport is None:
            self.inputTask = self.state
            self.inputReport = FwkJobReport()

        self.regexps = self.config.get("LogMatchRegexp", [])

        self.doStageOut = True
        doingStageOut = self.config.get("DoStageOut", [])
        if len(doingStageOut) > 0:
            control = doingStageOut[-1]
            if control == "False":
                self.doStageOut = False


        self.workflowSpecId = self.config['WorkflowSpecID'][0]
        self.jobSpecId = self.state.jobSpecNode.jobName


        self.compRegexps = []
        for regexp in self.regexps:
            self.compRegexps.append(re.compile(regexp))


        # TODO: These should be pulled in from the workflow now not the config thing
        self.override = False
        soParams = self.config.get('StageOutParameters', {})
        self.override = soParams.has_key("Override")
        self.overrideParams = {}

        if self.override:
            overrideConf = self.config['StageOutParameters']['Override']
            self.overrideParams = {
                "command" : None,
                "option" : None,
                "se-name" : None,
                "lfn-prefix" : None,
                }

            try:
                self.overrideParams['command'] = overrideConf['command'][0]
                self.overrideParams['se-name'] = overrideConf['se-name'][0]
                self.overrideParams['lfn-prefix'] = overrideConf['lfn-prefix'][0]
            except StandardError, ex:
                msg = "Unable to extract Override parameters from config:\n"
                msg += str(self.config['StageOutParameters'])
                raise StageOutInitError(msg)

            if overrideConf.has_key('option'):
                if len(overrideConf['option']) > 0:
                    self.overrideParams['option'] = overrideConf['option'][-1]
                else:
                    self.overrideParams['option'] = ""
コード例 #39
0
#!/usr/bin/env python

import os
import pickle
from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWAPILoader import CMSSWAPILoader
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec
from ProdCommon.MCPayloads.DatasetTools import getOutputDatasetsWithPSet

specfile = "/uscms/home/gutsche/CSA08-JetET110-CSA08_S43_S43_rereco_may19_PIC_v1-Workflow.xml"

rawCfgFile = "%s.raw.cfg" % os.path.basename(specfile)
origCfgFile = "%s.orig.cfg" % os.path.basename(specfile)
dbsCfgFile = "%s.dbs.cfg" % os.path.basename(specfile)

spec = WorkflowSpec()
spec.load(specfile)

rawCfg = spec.payload.cfgInterface.rawCfg
originalCfg = spec.payload.cfgInterface.originalCfg

dbsDatasets = getOutputDatasetsWithPSet(spec.payload)

handle = open(dbsCfgFile, 'w')
handle.write(dbsDatasets[0]['PSetContent'])
handle.close()

handle = open(origCfgFile, 'w')
handle.write(originalCfg)
handle.close()

loader = CMSSWAPILoader(os.environ['SCRAM_ARCH'],