def save(self): """ _save_ Pack the data object into an IMProvNode """ output = None try: try: config = os.environ.get("PRODAGENT_CONFIG", None) if config == None: msg = "No ProdAgent Config file provided\n" raise RuntimeError, msg cfgObject = ProdAgentConfiguration() cfgObject.loadFromFile(config) alertHandlerConfig = cfgObject.get("AlertHandler") workingDir = alertHandlerConfig["ComponentDir"] dir = os.path.join(os.path.expandvars(workingDir), "Alerts") if not os.path.exists(dir): os.makedirs(dir) self.FileName = os.path.join(dir, "alert-%s.dat" % makeUUID()) output = open(self.FileName, "wb") pickle.dump(self, output) except Exception, ex: # to do: Exception handling print ex raise RuntimeError, str(ex) finally: if output: output.close() return
def save(self): """ _save_ Pack the data object into an IMProvNode """ output = None try: try: config = os.environ.get("PRODAGENT_CONFIG", None) if config == None: msg = "No ProdAgent Config file provided\n" raise RuntimeError, msg cfgObject = ProdAgentConfiguration() cfgObject.loadFromFile(config) alertHandlerConfig = cfgObject.get("AlertHandler") workingDir = alertHandlerConfig['ComponentDir'] dir = os.path.join(os.path.expandvars(workingDir), 'Alerts') if not os.path.exists(dir): os.makedirs(dir) self.FileName = os.path.join(dir, "alert-%s.dat" % makeUUID()) output = open(self.FileName, 'wb') pickle.dump(self, output) except Exception, ex: # to do: Exception handling print ex raise RuntimeError, str(ex) finally: if output: output.close() return
def createCleanupJobSpec(workflowSpec, site, *lfns): """ _createCleanupJob_ Create a Cleanup JobSpec definition, using the cleanup workflow template, site name and the list of LFNs to be removed """ jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % (workflowSpec.workflowName(), makeUUID()) jobSpec.setJobName(jobName) jobSpec.setJobType("CleanUp") jobSpec.addWhitelistSite(site) lfnList = "" for lfn in lfns: lfnList += "%s\n" % lfn jobSpec.payload.configuration = lfnList return jobSpec
def newConvJobID(): """ _newConvJobID_ Return a new conversion job ID """ return "Conversion-%s" % makeUUID()
def newRepackMergeJobID(run): """ _newRepackMergeJobID_ Return a new repacker merge job ID from the DB that is associated to a given run """ return "RepackerMerge-Run%s-%s" %(run, makeUUID())
def newRepackJobID(run): """ _newRepackJobID_ Get a new repacker job ID from the DB that is associated to a given run """ return "Repacker-Run%s-%s" %(run, makeUUID())
def createLogCollectorJobSpec(workflowSpec, originalWf, site, lfnBase, stageOutParams, *lfns): """ createLogCollectorJobSpec Create a LogArchive JobSpec definition, using the LogArchive workflow template, site name and the list of LFNs to be removed """ jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % (workflowSpec.workflowName(), makeUUID()) jobSpec.setJobName(jobName) jobSpec.setJobType("LogCollect") jobSpec.addWhitelistSite(site) confNode = IMProvNode("LogCollectorConfig") # add site and workflow to collect confNode.addNode(IMProvNode("wf", originalWf)) confNode.addNode(IMProvNode("se", site)) confNode.addNode(IMProvNode("lfnBase", lfnBase)) # add logs to collect logNode = IMProvNode("LogsToCollect") for lfn in lfns: logNode.addNode(IMProvNode("lfn", lfn)) confNode.addNode(logNode) # stageout if stageOutParams: stageOutNode = IMProvNode("Override") # WorkflowTools.addStageOutOverride(confNode, stageOutParams['command'], # stageOutParams['option'], # stageOutParams['se-name'], # stageOutParams['lfnPrefix']) stageOutNode.addNode(IMProvNode("command", stageOutParams['command'])) stageOutNode.addNode(IMProvNode("option", stageOutParams['option'])) stageOutNode.addNode(IMProvNode("se-name", stageOutParams['se-name'])) stageOutNode.addNode( IMProvNode("lfn-prefix", stageOutParams['lfnPrefix'])) confNode.addNode(stageOutNode) #jobSpec.payload.configuration = logNode.makeDOMElement().toprettyxml() jobSpec.payload.configuration = confNode.makeDOMElement().toprettyxml() return jobSpec
def createLogCollectorJobSpec(workflowSpec, originalWf, site, lfnBase, stageOutParams, *lfns): """ createLogCollectorJobSpec Create a LogArchive JobSpec definition, using the LogArchive workflow template, site name and the list of LFNs to be removed """ jobSpec = workflowSpec.createJobSpec() jobName = "%s-%s" % (workflowSpec.workflowName(), makeUUID()) jobSpec.setJobName(jobName) jobSpec.setJobType("LogCollect") jobSpec.addWhitelistSite(site) confNode = IMProvNode("LogCollectorConfig") # add site and workflow to collect confNode.addNode(IMProvNode("wf", originalWf)) confNode.addNode(IMProvNode("se", site)) confNode.addNode(IMProvNode("lfnBase", lfnBase)) # add logs to collect logNode = IMProvNode("LogsToCollect") for lfn in lfns: logNode.addNode(IMProvNode("lfn", lfn)) confNode.addNode(logNode) # stageout if stageOutParams: stageOutNode = IMProvNode("Override") # WorkflowTools.addStageOutOverride(confNode, stageOutParams['command'], # stageOutParams['option'], # stageOutParams['se-name'], # stageOutParams['lfnPrefix']) stageOutNode.addNode(IMProvNode("command", stageOutParams["command"])) stageOutNode.addNode(IMProvNode("option", stageOutParams["option"])) stageOutNode.addNode(IMProvNode("se-name", stageOutParams["se-name"])) stageOutNode.addNode(IMProvNode("lfn-prefix", stageOutParams["lfnPrefix"])) confNode.addNode(stageOutNode) # jobSpec.payload.configuration = logNode.makeDOMElement().toprettyxml() jobSpec.payload.configuration = confNode.makeDOMElement().toprettyxml() return jobSpec
def __init__(self, t0astFile=None, **args): """ ___init___ Initialize all attributes. If T0ASTFile is passed as parameter, create the block instance using T0ASTFile by extracting necessary information from given T0ASTFile and assign to a Block instance """ dict.__init__(self) self.setdefault("BLOCK_ID", None) self.setdefault("STATUS", "Active") self.setdefault("MIGRATE_STATUS", "NotMigrated") if t0astFile != None: # full path used in dbs self.setdefault("BLOCK_NAME", "/%s/%s/%s#%s" % (t0astFile["PRIMARY_DATASET"], t0astFile["PROCESSED_DATASET"], t0astFile["DATA_TIER"], makeUUID()) ) self.setdefault("RUN_ID", t0astFile.getRunID()) self.setdefault("DATASET_ID", t0astFile["DATASET_ID"]) self.setdefault("DATASET_PATH_ID", t0astFile.getDatasetPathID()) self.setdefault("DATA_TIER", t0astFile["DATA_TIER"]) self.setdefault("BLOCKSIZE", t0astFile["FILESIZE"]) self.setdefault("FILECOUNT", 1) else: self.setdefault("BLOCK_NAME", None) self.setdefault("RUN_ID", None) self.setdefault("DATASET_ID", None) self.setdefault("DATASET_PATH_ID", None) self.setdefault("DATA_TIER", None) self.setdefault("BLOCKSIZE", 0) # byte self.setdefault("FILECOUNT", 0) self.update(args)
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed']) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[ keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel=self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join( (self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra=self.acquisitionEra, ProcessingString=processingString, ProcessingVersion=self.processingVersion, Unmerged=True) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version=cmsRunNode.application['Version'], Label=self.label, Group=self.group, FilterName=filterName, RequestId=self.requestId, Unmerged=True) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra=self.acquisitionEra, Conditions=self.workflow.parameters['Conditions'], ProcessingVersion=self.workflow. parameters['ProcessingVersion'], FilterName=filterName, Unmerged=True) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset[ 'IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets[ '%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow
def createSuccessReport(self, jobSpecLoaded, workerNodeInfo, reportFilePath): """ _createSuccessReport_ Create a job report representing the successful completion of a job. The jobSpecLoaded parameter is a reference to an instance of the JobSpec class that has been initialized with the job spec that we are generating a report for. """ jobSpecPayload, newReport = \ self.__fwkJobReportCommon(jobSpecLoaded, workerNodeInfo) newReport.exitCode = 0 newReport.status = "Success" if "jobId" in jobSpecLoaded.parameters.keys(): newReport.jobSpecId = jobSpecLoaded.parameters["jobId"] # Create a list of datasets from the JobSpec # then associate file to these later on datasets = getOutputDatasetDetails(jobSpecPayload) datasets.extend(getSizeBasedMergeDatasetsFromNode(jobSpecPayload)) outModules = jobSpecPayload.cfgInterface.outputModules inputFiles = jobSpecPayload.cfgInterface.inputFiles for dataset in datasets: modName = dataset.get('OutputModuleName', None) if outModules.has_key(modName): dataset['LFNBase'] = outModules[modName].get('LFNBase', None) self.setDefaultForNoneValue('LFNBase', dataset['LFNBase']) dataset['MergedLFNBase'] = \ outModules[modName].get('MergedLFNBase', None) datasetMap = {} for dataset in datasets: datasetMap[dataset['OutputModuleName']] = dataset for outName, outMod in \ jobSpecPayload.cfgInterface.outputModules.items(): theFile = newReport.newFile() guid = makeUUID() if outMod.has_key("LFNBase"): theFile['LFN'] = "%s/%s.root" % (outMod['LFNBase'], guid) else: theFile['LFN'] = "/some/madeup/path/%s.root" % guid self.setDefaultForNoneValue('LFNBase', theFile['LFN']) theFile['PFN'] ="fakefile:%s" % theFile['LFN'] theFile['GUID'] = guid theFile['MergedBySize'] = choice(["True", "False"]) theFile['ModuleLabel'] = outName # basic measurement is byte (minumum 4MB, max 4GB) theFile['Size'] = 4000000 * randrange(1, 1000) runNum = jobSpecLoaded.parameters["RunNumber"] # need to get lumi lumiList = jobSpecLoaded.parameters.get("LumiSections", []) theFile.runs[runNum] = RunInfo(runNum, *lumiList) #check if the maxEvents['output'] is set if not set totalEvent using maxEvents['input'] totalEvent = jobSpecPayload.cfgInterface.maxEvents['output'] if totalEvent == None: totalEvent = jobSpecPayload.cfgInterface.maxEvents['input'] # if there is no input and output, print out error message and set default to 1000 totalEvent = self.setDefaultForNoneValue( "maxEvent['input' and 'output']", totalEvent, 100) try: totalEvent = int(totalEvent) except ValueError, ex: logging.error("totalEvent is not a number. \n%s" % ex) # event size should be >= 0 # totalEvent is -1 process all event if totalEvent < 0: totalEvent = 200 if (random() > self.avgEventProcessingRate): # Gauss distribution of totalEvent. meanEvent = int(totalEvent * 0.7) stdDev = totalEvent * 0.15 tempTotalEvent = int(gauss(meanEvent,stdDev)) if tempTotalEvent <= 0 : totalEvent = 1 elif tempTotalEvent >= totalEvent: totalEvent = totalEvent - 1 else: totalEvent = tempTotalEvent #logging.debug("---------- Total Event ----------: %s \n" % totalEvent) theFile['TotalEvents'] = totalEvent theFile['SEName'] = workerNodeInfo['se-name'] theFile['CEname'] = workerNodeInfo['ce-name'] theFile['Catalog'] = outMod['catalog'] theFile['Stream'] = outMod['stream'] theFile['OutputModuleClass'] = "PoolOutputModule" theFile.addChecksum("cksum", randrange(1000000, 10000000)) theFile.branches.extend(["fakeBranch_%d-%s.Rec" % (num, guid) for num in range(randrange(5,20))]) #theFile.load(theFile.save()) theFile["BranchHash"] = randrange(2000000, 30000000) [ theFile.addInputFile("fakefile:%s" % x , "%s" % x ) for x in inputFiles ] if datasetMap.has_key(outName): datasetForFile = theFile.newDataset() datasetForFile.update(datasetMap[outName])
def __call__(self, *fileList): jobSpec = self.spec.createJobSpec() jobId = "%s-%s" % (self.spec.workflowName(), self.count) jobSpec.setJobName(jobId) jobSpec.setJobType("Merge") jobSpec.addWhitelistSite("storage.element.edu") # get PSet cfg = jobSpec.payload.cfgInterface # set output module #print jobSpec.payload # set output file name prim = self.dataset['PrimaryDataset'] tier = self.dataset['DataTier'] lastBit = self.dataset['ProcessedDataset'] acqEra = None #if .has_key("AcquisitionEra"): acqEra = jobSpec.parameters.get("AcquisitionEra", None) # compute LFN group based on merge jobs counter group = str(self.count // 1000).zfill(4) jobSpec.parameters['RunNumber'] = self.spec.workflowRunNumber() remainingBits = lastBit if acqEra != None: thingtoStrip = "%s_" % acqEra mypieces = lastBit.split(thingtoStrip, 1) if len(mypieces) > 1: remainingBits = mypieces[1].split("-unmerged", 1)[0] else: remainingBits=lastBit outModule = cfg.outputModules['Merged'] lfnBase = outModule['LFNBase'] extendedlfnBase = os.path.join(lfnBase, prim, tier, remainingBits, group) baseFileName = "%s.root" % makeUUID() outModule['fileName'] = baseFileName outModule['logicalFileName'] = os.path.join(extendedlfnBase, baseFileName) # set output catalog outModule['catalog'] = "%s-merge.xml" % jobId # set input module # get input file names (expects a trivial catalog on site) cfg.inputFiles = ["%s" % fileName for fileName in fileList] # target file name mergeJobSpecFile = "%s/%s-spec.xml" % ( self.dir, jobId) # save job specification jobSpec.save(mergeJobSpecFile) self.count += 1 return jobSpec
def makeWorkflow(self): """ _makeWorkflow_ Call this method to create the workflow spec instance when done """ self._Validate() # // # // Add Stage Out node #// self.saveOutputFor.append(self.cmsRunNode.name) WorkflowTools.addStageOutNode(self.cmsRunNode, "stageOut1", *self.saveOutputFor) WorkflowTools.addLogArchNode(self.cmsRunNode, "logArchive") # // # // Input Dataset? #// if self.inputDataset['IsUsed']: inputDataset = self.cmsRunNodes[0].addInputDataset( self.inputDataset['Primary'], self.inputDataset['Processed'] ) inputDataset["DataTier"] = self.inputDataset['DataTier'] for keyname in [ 'SplitType', 'SplitSize', 'OnlySites', 'OnlyBlocks', 'OnlyClosedBlocks', ]: if self.inputDataset[keyname] != None: self.workflow.parameters[keyname] = self.inputDataset[keyname] # // # // Pileup Datasets? #// for pileupDataset in self.pileupDatasets: puDataset = self.cmsRunNodes[0].addPileupDataset( pileupDataset['Primary'], pileupDataset['DataTier'], pileupDataset['Processed']) puDataset['FilesPerJob'] = pileupDataset['FilesPerJob'] if pileupDataset['TargetModule'] is not None: puDataset['TargetModule'] = pileupDataset['TargetModule'] # // # // Extract dataset info from cfg #// datasets = {} datasetsToForward = {} for cmsRunNode, config in zip(self.cmsRunNodes, self.configurations): # Ignore nodes that don't save any output. But keep input dataset # in case we need to forward it. if cmsRunNode.name not in self.saveOutputFor: # Store parent dataset in case we need to forward it. if self.inputDataset['IsUsed'] and \ cmsRunNode == self.cmsRunNodes[0]: datasetsToForward[cmsRunNode.name] = \ self.inputDataset['DatasetName'] elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: # If the previous cmsRunNode stages out, pull down the # dataset it produced. if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] # If the previous cmsRunNode does not stage out, then # use it's parent. else: # TODO: Wont work if more than one InputLink exists datasetsToForward[cmsRunNode.name] = \ datasetsToForward[inputLink['InputNode']] continue for outModName in config.outputModules.keys(): moduleInstance = config.getOutputModule(outModName) dataTier = moduleInstance['dataTier'] filterName = moduleInstance["filterName"] primaryName = DatasetConventions.primaryDatasetName( PhysicsChannel = self.channel, ) if self.useProperNamingConventions: if self.processingString and filterName: processingString = "_".join((self.processingString, filterName)) elif self.processingString: processingString = self.processingString elif filterName: processingString = filterName else: processingString = None processedName = DatasetConventions.properProcessedDatasetName( AcquisitionEra = self.acquisitionEra, ProcessingString = processingString, ProcessingVersion = self.processingVersion, Unmerged = True ) elif self.acquisitionEra == None: processedName = DatasetConventions.processedDatasetName( Version = cmsRunNode.application['Version'], Label = self.label, Group = self.group, FilterName = filterName, RequestId = self.requestId, Unmerged = True ) else: processedName = DatasetConventions.csa08ProcessedDatasetName( AcquisitionEra = self.acquisitionEra, Conditions = self.workflow.parameters['Conditions'], ProcessingVersion = self.workflow.parameters['ProcessingVersion'], FilterName = filterName, Unmerged = True ) dataTier = DatasetConventions.checkDataTier(dataTier) moduleInstance['primaryDataset'] = primaryName moduleInstance['processedDataset'] = processedName outDS = cmsRunNode.addOutputDataset(primaryName, processedName, outModName) outDS['Status'] = self.outputDatasetStatus outDS['DataTier'] = dataTier outDS["ApplicationName"] = \ cmsRunNode.application["Executable"] outDS["ApplicationFamily"] = outModName outDS["PhysicsGroup"] = self.group # check for input dataset for first node if self.inputDataset['IsUsed'] and cmsRunNode == self.cmsRunNodes[0]: outDS['ParentDataset'] = self.inputDataset['DatasetName'] # check for staged out intermediates elif cmsRunNode != self.cmsRunNodes[0]: for inputLink in cmsRunNode._InputLinks: if not inputLink["AppearStandalone"]: # TODO: Wont work if more than one InputLink exists outDS['ParentDataset'] = datasets['%s:%s' % (inputLink['InputNode'], inputLink['OutputModule'])] elif datasetsToForward.get( inputLink['InputNode']) is not None: outDS['ParentDataset'] = \ datasetsToForward[inputLink['InputNode']] if self.options['FakeHash']: guid = makeUUID() outDS['PSetHash'] = "hash=%s;guid=%s" % \ (self.psetHashes[cmsRunNode.name], guid) else: outDS['PSetHash'] = self.psetHashes[cmsRunNode.name] # record output in case used as input to a later node datasets['%s:%s' % (cmsRunNode.name, outModName)] = \ "/%s/%s/%s" % ( outDS['PrimaryDataset'], outDS['ProcessedDataset'], outDS['DataTier']) # optionally remap sibling relationships to parent-child (i.e HLTDEBUG) remapParentageForWorkflow(self.workflow) WorkflowTools.generateFilenames(self.workflow) return self.workflow
def createSuccessReport(self, jobSpecLoaded, workerNodeInfo, reportFilePath): """ _createSuccessReport_ Create a job report representing the successful completion of a job. The jobSpecLoaded parameter is a reference to an instance of the JobSpec class that has been initialized with the job spec that we are generating a report for. """ jobSpecPayload, newReport = \ self.__fwkJobReportCommon(jobSpecLoaded, workerNodeInfo) newReport.exitCode = 0 newReport.status = "Success" # parse newReport.jobSpecId (it should contain Job Name # "Repack-Run%s-%s", "RepackMerge-Run%s-%s", "PromptReco-Run%s-%s" specIDParts = newReport.jobSpecId.split('-') tier0JobType = None if len(specIDParts) != 3: logging.debug("JobReport jobSpecID not in correct format for tier 0: %s" % newReport.jobSpecId) else: # Job type should be one of "Repack", "RepackMerge", "PromptReco" tier0JobType = specIDParts[0].strip() if "jobId" in jobSpecLoaded.parameters.keys(): newReport.jobSpecId = jobSpecLoaded.parameters["jobId"] # Create a list of datasets from the JobSpec # then associate file to these later on datasets = getOutputDatasetDetails(jobSpecPayload) datasets.extend(getSizeBasedMergeDatasetsFromNode(jobSpecPayload)) outModules = jobSpecPayload.cfgInterface.outputModules inputFiles = jobSpecPayload.cfgInterface.inputFiles for dataset in datasets: modName = dataset.get('OutputModuleName', None) if outModules.has_key(modName): dataset['LFNBase'] = outModules[modName].get('LFNBase', None) self.setDefaultForNoneValue('LFNBase', dataset['LFNBase']) dataset['MergedLFNBase'] = \ outModules[modName].get('MergedLFNBase', None) datasetMap = {} for dataset in datasets: datasetMap[dataset['OutputModuleName']] = dataset for outName, outMod in \ jobSpecPayload.cfgInterface.outputModules.items(): theFile = newReport.newFile() guid = makeUUID() theFile['GUID'] = guid theFile['ModuleLabel'] = outName runNum = jobSpecLoaded.parameters["RunNumber"] # need to get lumi lumiList = jobSpecLoaded.parameters.get("LumiSections", []) theFile.runs[runNum] = RunInfo(runNum, *lumiList) #check if the maxEvents['output'] iE s set if not set totalEvent using maxEvents['input'] totalEvent = jobSpecPayload.cfgInterface.maxEvents['output'] if totalEvent == None: totalEvent = jobSpecPayload.cfgInterface.maxEvents['input'] # if there is no input and output, print out error message and set default to 1000 totalEvent = self.setDefaultForNoneValue( "maxEvent['input' and 'output']", totalEvent, 100) try: totalEvent = int(totalEvent) except ValueError, ex: logging.error("totalEvent is not a number. \n%s" % ex) if (random() > self.avgEventProcessingRate): # Gauss distribution of totalEvent. meanEvent = int(totalEvent * 0.7) stdDev = totalEvent * 0.15 tempTotalEvent = int(gauss(meanEvent,stdDev)) if tempTotalEvent <= 0 : totalEvent = 1 elif tempTotalEvent >= totalEvent: totalEvent = totalEvent - 1 else: totalEvent = tempTotalEvent #logging.debug("---------- Total Event ----------: %s \n" % totalEvent) theFile['TotalEvents'] = totalEvent theFile['SEName'] = workerNodeInfo['se-name'] theFile['CEname'] = workerNodeInfo['ce-name'] theFile['Catalog'] = outMod['catalog'] theFile['Stream'] = outMod['stream'] theFile['OutputModuleClass'] = "PoolOutputModule" theFile.addChecksum("cksum", randrange(1000000, 10000000)) theFile.branches.extend(["fakeBranch_%d-%s.Rec" % (num, guid) for num in range(randrange(5,20))]) #theFile.load(theFile.save()) theFile["BranchHash"] = randrange(2000000, 30000000) [ theFile.addInputFile("fakefile:%s" % x , "%s" % x ) for x in inputFiles ] if datasetMap.has_key(outName): datasetForFile = theFile.newDataset() datasetForFile.update(datasetMap[outName]) # basic measurement is byte (minumum 4MB, max 4GB) # default value for the file size # it should be overridden if the primary dataset exist. # for the all other theFile['Size'] = 4000000 * randrange(1, 1000) #random size theFile['MergedBySize'] = choice(["True", "False"]) # setting up default LFN if outMod.has_key("LFNBase"): theFile['LFN'] = "%s%s.root" % (outMod['LFNBase'], guid) else: theFile['LFN'] = "/some/madeup/path/%s.root" % guid self.setDefaultForNoneValue('LFNBase', theFile['LFN']) if tier0JobType == "Repack": # parse dataset name set the size according to the threshold if len(theFile.dataset) == 0: continue datasetNameParts = theFile.dataset[0]["PrimaryDataset"].split('_') # need to add sanity check if self.thresholdForMerge > int(datasetNameParts[2]): theFile['Size'] = 500000000 #(500 MG) theFile['MergedBySize'] = "False" else : theFile['Size'] = 4000000000 #(4 G) theFile['MergedBySize'] = "True" #override LFN fro Merged file theFile['LFN'] = "%s%s.root" % (outMod['MergedLFNBase'], guid) elif tier0JobType == "RepackMerge": theFile['Size'] = 4000000000 #(4 G) theFile['MergedBySize'] = "True" elif tier0JobType == "PromptReco": theFile['Size'] = 2000000000 #(2 G) else : theFile['Size'] = 4000000 * randrange(1, 1000) #random size theFile['PFN'] ="fakefile:%s" % theFile['LFN']