コード例 #1
0
def createCleanupWorkflowSpec():
    """
    _createCleanupWorkflowSpec_

    Create a generic cleanup WorkflowSpec definition
    that can be used to generate a sanbox for cleanup jobs

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("CleanUp-%s" % timestamp)
    workflow.setActivity("CleanUp")
    workflow.setRequestCategory("mc-cleanup")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters['WorkflowType'] = "CleanUp"

    cleanUp = workflow.payload
    cleanUp.name = "cleanUp1"
    cleanUp.type = "CleanUp"

    cleanUp.application["Project"] = ""
    cleanUp.application["Version"] = ""
    cleanUp.application["Architecture"] = ""
    cleanUp.application["Executable"] = "RuntimeCleanUp.py"  # binary name
    cleanUp.configuration = ""
    cleanUp.cfgInterface = None

    return workflow
コード例 #2
0
ファイル: RequestSpec.py プロジェクト: jlexternal/WPrime13TeV
    def load(self, improvNode):
        """
        _load_

        Extract information for this object from the improv instance provided
        """
        wfQuery = IMProvQuery("/RequestSpec/WorkflowSpec")
        wfnode = wfQuery(improvNode)[0]
        wfspec = WorkflowSpec()
        wfspec.loadFromNode(wfnode)
        self.workflow = wfspec

        policyQuery = IMProvQuery("/RequestSpec/Policies/*")
        detailQuery = IMProvQuery("/RequestSpec/RequestDetails/*")
        preferredPAQuery = IMProvQuery("/RequestSpec/PreferredPA")

        policies = policyQuery(improvNode)
        details = detailQuery(improvNode)
        preferredPAs = preferredPAQuery(improvNode)

        for policy in policies:
            self.policies[str(policy.name)] = str(policy.chardata)

        for detail in improvNode.attrs.keys():
            self.requestDetails[detail] = str(improvNode.attrs[detail])

        for preferredPA in preferredPAs:
            self.preferredPAs[str(preferredPA.attrs['id'])] = \
                str(preferredPA.attrs['priority'])

        return
コード例 #3
0
def createProductionWorkflow(prodName,
                             cmsswVersion,
                             cfgFile=None,
                             category="mc",
                             **args):
    """
    _createProductionWorkflow_

    Create a Production style workflow, ie generation of new events

    """

    timestamp = int(time.time())
    if args.get("PyCfg", None) == None:
        if cfgFile == None:
            msg = "Error: No Cfg File or python cfg file provided to createProductionWorkflow"
            raise RuntimeError, msg
        pycfgFile = createPythonConfig(cfgFile)
        pycfgFileContent = file(pycfgFile).read()
    else:
        pycfgFileContent = args['PyCfg']

    if args.get("PSetHash", None) == None:
        realPSetHash = createPSetHash(cfgFile)
    else:
        realPSetHash = args['PSetHash']

    #  //
    # // Create a new WorkflowSpec and set its name
    #//
    spec = WorkflowSpec()
    workflowname = "%s__%s-%s-%s-%s" % (
        prodName, cmsswVersion, args.get("processingLabel", "Test07"),
        args.get("physicsGroup", "NoPhysicsGroup"), timestamp)
    spec.setWorkflowName(workflowname)
    spec.setRequestCategory(category)
    spec.setRequestTimestamp(timestamp)

    cmsRun = spec.payload
    populateCMSRunNode(cmsRun,
                       "cmsRun1",
                       cmsswVersion,
                       pycfgFileContent,
                       realPSetHash,
                       timestamp,
                       prodName,
                       physicsGroup=args.get("physicsGroup", "NoPhysicsGroup"),
                       processingLabel=args.get("processingLabel", "Test07"),
                       fakeHash=args.get("FakeHash", False))

    addStageOutNode(cmsRun, "stageOut1")
    generateFilenames(spec)
    return spec
コード例 #4
0
def createJobSpec(jobSpecId,
                  workflowSpecFile,
                  filename,
                  runNumber,
                  eventCount,
                  firstEvent=None,
                  saveString=False,
                  loadString=True):

    #  //
    # // Load workflow
    #//
    workflowSpec = WorkflowSpec()
    if loadString:
        workflowSpec.loadString(workflowSpecFile)
    else:
        workflowSpec.load(workflowSpecFile)

    #  //
    # // Create JobSpec
    #//
    jobSpec = workflowSpec.createJobSpec()
    jobName = "%s-%s" % (workflowSpec.workflowName(), runNumber)

    #jobSpec.setJobName(jobName)
    jobSpec.setJobName(jobSpecId)
    jobSpec.setJobType("Processing")
    jobSpec.parameters['RunNumber'] = runNumber
    jobSpec.parameters['EventCount'] = eventCount

    jobSpec.payload.operate(DefaultLFNMaker(jobSpec))

    if firstEvent != None:
        jobSpec.parameters['FirstEvent'] = firstEvent

    cfgMaker = ConfigGenerator(jobSpec)
    jobSpec.payload.operate(cfgMaker)

    if saveString:
        return jobSpec.saveString()
    jobSpec.save(filename)
    return
コード例 #5
0
def createLogCollectorWorkflowSpec(wf):
    """
    _createLogColectorWorkflowSpec_

    Create a generic LogArchive WorkflowSpec definition

    """
    timestamp = str(time.asctime(time.localtime(time.time())))
    timestamp = timestamp.replace(" ", "-")
    timestamp = timestamp.replace(":", "_")
    workflow = WorkflowSpec()
    workflow.setWorkflowName("LogCollect-%s" % timestamp)
    workflow.setActivity("LogCollect")
    workflow.setRequestCategory("logcollect")
    workflow.setRequestTimestamp(timestamp)
    workflow.parameters['WorkflowType'] = "LogCollect"

    logArchive = workflow.payload
    logArchive.name = "logCollect1"
    logArchive.type = "LogCollect"
    #TODO: remove this?
    #logArchive.workflow = wf
    logArchive.configuration
    logArchive.application["Project"] = ""
    logArchive.application["Version"] = ""
    logArchive.application["Architecture"] = ""
    logArchive.application[
        "Executable"] = "RuntimeLogCollector.py"  # binary name
    logArchive.configuration = ""
    logArchive.cfgInterface = None

    #set stageOut override
    #cfg = IMProvNode("config")
    #stageOut = IMProvNode("StageOutParameters")
    #cfg.addNode()
    #WorkflowTools.addStageOutNode(logArchive, "StageOut1")
    #WorkflowTools.addStageOutOverride(logArchive, stageOutParams['command'],
    #                                  stageOutParams['option'],
    #                                  stageOutParams['se-name'],
    #                                  stageOutParams['lfnPrefix'])

    return workflow
コード例 #6
0
    def __init__(self, requestId, channel, label):
        self.requestId = requestId
        self.group = None
        self.label = label
        self.timestamp = int(time.time())
        self.channel = channel
        self.cmsswVersions = []
        self.configurations = []
        self.psetHashes = {}
        self.origCfgs = {}
        self.acquisitionEra = None
        self.processingString = None
        self.processingVersion = None
        self.conditions = None

        # turn on use of proper naming convention for datasets
        # should be made the default soon, lets deprecate all the old crap
        self.useProperNamingConventions = False

        self.options = {}
        self.options.setdefault('FakeHash', False)

        # Should we use another attribute for setting the output dataset
        # status in DBS?
        self.outputDatasetStatus = 'VALID'

        self.inputDataset = {}
        self.inputDataset['IsUsed'] = False
        self.inputDataset['DatasetName'] = None
        self.inputDataset['Primary'] = None
        self.inputDataset['Processed'] = None
        self.inputDataset['DataTier'] = None
        #  //
        # // Extra controls over input dataset if required
        #//
        self.inputDataset['SplitType'] = None
        self.inputDataset['SplitSize'] = None
        self.inputDataset['OnlySites'] = None
        self.inputDataset['OnlyBlocks'] = None
        self.inputDataset['OnlyClosedBlocks'] = True

        #  //
        # // Pileup Dataset controls
        #//
        self.pileupDatasets = []

        #  //
        # // Initialise basic workflow
        #//
        self.workflow = WorkflowSpec()
        self.workflowName = "%s-%s-%s" % (label, channel, requestId)
        self.workflow.setWorkflowName(self.workflowName)
        self.workflow.setRequestCategory("mc")
        self.workflow.setRequestTimestamp(self.timestamp)
        self.workflow.parameters['RequestLabel'] = self.label
        self.workflow.parameters['ProdRequestID'] = self.requestId

        self.cmsRunNode = self.workflow.payload
        self.cmsRunNode.name = "cmsRun1"
        self.cmsRunNode.type = "CMSSW"

        self.cmsRunNodes = [self.cmsRunNode]
        self.saveOutputFor = []
コード例 #7
0
#!/usr/bin/env python

import os
import pickle
from ProdCommon.CMSConfigTools.ConfigAPI.CMSSWAPILoader import CMSSWAPILoader
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec
from ProdCommon.MCPayloads.DatasetTools import getOutputDatasetsWithPSet

specfile = "/uscms/home/gutsche/CSA08-JetET110-CSA08_S43_S43_rereco_may19_PIC_v1-Workflow.xml"

rawCfgFile = "%s.raw.cfg" % os.path.basename(specfile)
origCfgFile = "%s.orig.cfg" % os.path.basename(specfile)
dbsCfgFile = "%s.dbs.cfg" % os.path.basename(specfile)

spec = WorkflowSpec()
spec.load(specfile)

rawCfg = spec.payload.cfgInterface.rawCfg
originalCfg = spec.payload.cfgInterface.originalCfg

dbsDatasets = getOutputDatasetsWithPSet(spec.payload)

handle = open(dbsCfgFile, 'w')
handle.write(dbsDatasets[0]['PSetContent'])
handle.close()

handle = open(origCfgFile, 'w')
handle.write(originalCfg)
handle.close()

loader = CMSSWAPILoader(os.environ['SCRAM_ARCH'],
コード例 #8
0
ファイル: MergeTools.py プロジェクト: jlexternal/WPrime13TeV
def createMergeJobWorkflow(procSpec,
                           isFastMerge=True,
                           doCleanUp=True,
                           littleE=False):
    """
    _createMergeJobWorkflow_

    Given a Processing Workflow, generate a set of Merge Job
    workflows that can be used to generate actual merge jobs 
    (as opposed to creating datasets like createMergeDatasetWorkflow)

    returns a dictionary of (input, IE MergeSensor watched) dataset name
    to workflow spec instances

    """
    mergeDatasetWF = createMergeDatasetWorkflow(procSpec, isFastMerge)
    mergeDatasets = mergeDatasetWF.outputDatasets()

    results = {}

    procSpecName = procSpec.workflowName()

    for dataset in mergeDatasets:
        inputDataset = dataset['ParentDataset']

        newWF = WorkflowSpec()
        newWF.parameters.update(procSpec.parameters)
        newWF.setWorkflowName(procSpecName)
        newWF.parameters['WorkflowType'] = "Merge"

        cmsRunNode = newWF.payload
        cmsRunNode.name = "cmsRun1"
        cmsRunNode.type = "CMSSW"
        cmsRunNode.application["Project"] = "CMSSW"
        cmsRunNode.application["Version"] = dataset['ApplicationVersion']
        cmsRunNode.application["Architecture"] = "slc3_ia32_gcc323"

        #  //
        # // Hack to forward UserSandbox to Merge Jobs
        #//
        userSandbox = dataset.get("UserSandbox", None)
        if userSandbox != None:
            cmsRunNode.userSandbox = userSandbox

        #if isFastMerge == True:
        #    if littleE:
        #        cmsRunNode.application["Executable"] = "edmFastMerge"
        #    else:
        #        cmsRunNode.application["Executable"] = _FastMergeBinary
        #    outputModuleName = "EdmFastMerge"
        #else:
        cmsRunNode.application["Executable"] = "cmsRun"
        outputModuleName = "Merged"

        #  //
        # // Input Dataset
        #//
        datasetBits = DatasetConventions.parseDatasetPath(inputDataset)
        inDataset = cmsRunNode.addInputDataset(datasetBits['Primary'],
                                               datasetBits['Processed'])
        inDataset["DataTier"] = datasetBits['DataTier']

        #  //
        # // Output Dataset
        #//

        outputDataset = cmsRunNode.addOutputDataset(
            dataset['PrimaryDataset'], dataset['ProcessedDataset'],
            outputModuleName)

        outputDataset["DataTier"] = dataset['DataTier']
        outputDataset["PSetHash"] = dataset['PSetHash']

        outputDataset["ApplicationName"] = \
                    cmsRunNode.application["Executable"]
        outputDataset["ApplicationProject"] = \
                    cmsRunNode.application["Project"]
        outputDataset["ApplicationVersion"] = \
                    cmsRunNode.application["Version"]
        outputDataset["ApplicationFamily"] = outputModuleName
        outputDataset["PhysicsGroup"] = \
                      procSpec.parameters.get('PhysicsGroup', None)
        outputDataset['ParentDataset'] = inputDataset

        #  //
        # // Add Stage Out node
        #//
        WorkflowTools.addStageOutNode(cmsRunNode, "stageOut1")
        if doCleanUp == True:
            WorkflowTools.addCleanUpNode(cmsRunNode, "cleanUp1")

        #  //
        # // Add log archive node
        #//
        WorkflowTools.addLogArchNode(cmsRunNode, "logArchive")

        WorkflowTools.generateFilenames(newWF)

        results[inputDataset] = newWF

    return results