def addTier1Skim(config, skimName, dataTier, primaryDataset, cmsswVersion, processingVersion, configURL, globalTag, twoFileRead = False, skimNode = None): """ _addTier1Skim_ Add the configuration of a skim that is to be run over a particular primary dataset and data tier at a particular site to the Tier0 configuration. The skims will be launched as blocks are transfered to the site. The site name must correspond to the site name in the ProdAgent JobQueue. """ datasetConfig = config.Datasets.section_(primaryDataset) skimConfig = ConfigSection(name = "SomeTier1Skim") skimConfig.PrimaryDataset = primaryDataset skimConfig.DataTier = dataTier skimConfig.SkimName = skimName skimConfig.CMSSWVersion = cmsswVersion skimConfig.ConfigURL = configURL skimConfig.GlobalTag = globalTag skimConfig.ProcessingVersion = processingVersion skimConfig.TwoFileRead = twoFileRead skimConfig.Node = skimNode datasetConfig.Tier1Skims.append(skimConfig) return
def __init__(self, name): ConfigSection.__init__(self, name) self._internal_treetop = False self.section_("tree") self.tree.section_("children") self.tree.childNames = [] self.tree.parent = None
def testForwardSinkBasic(self): config = ConfigSection("forward") # address of the Processor, resp. Receiver to forward Alerts to config.address = self.address1 config.controlAddr = self.controlAddr1 config.label = "ForwardSinkTest" forwarder = ForwardSink(config)
def testF(self): """ Test internal functions pythonise_, listSections_ """ config = ConfigSection("config") config.section_("SectionA") config.section_("SectionB") config.SectionA.section_("Section1") config.SectionA.section_("Section2") config.SectionA.Section1.x = 100 config.SectionA.Section1.y = 100 pythonise = config.pythonise_() assert "config.section_('SectionA')" in pythonise, "Pythonise failed: Could not find SectionA" assert "config.SectionA.Section1.x = 100" in pythonise, "Pythonise failed: Could not find x" pythonise = config.SectionA.pythonise_() assert "SectionA.section_('Section1')" in pythonise, "Pythonise failed: Could not find Section1" assert "SectionA.Section1.x = 100" in pythonise, "Pythonise failed: Could not find x" self.assertEqual(config.listSections_(), ['SectionB', 'SectionA']) self.assertEqual(config.SectionA.listSections_(), ['Section2', 'Section1'])
def reportWorkflowToDashboard(self, dashboardActivity): """ _reportWorkflowToDashboard_ Gathers workflow information from the arguments and reports it to the dashboard """ try: #Create a fake config conf = ConfigSection() conf.section_('DashboardReporter') conf.DashboardReporter.dashboardHost = self.dashboardHost conf.DashboardReporter.dashboardPort = self.dashboardPort #Create the reporter reporter = DashboardReporter(conf) #Assemble the info workflow = {} workflow['name'] = self.workloadName workflow['application'] = self.frameworkVersion workflow['TaskType'] = dashboardActivity #Let's try to build information about the inputDataset dataset = 'DoesNotApply' if hasattr(self, 'inputDataset'): dataset = self.inputDataset workflow['datasetFull'] = dataset workflow['user'] = '******' #Send the workflow info reporter.addTask(workflow) except: #This is not critical, if it fails just leave it be logging.error("There was an error with dashboard reporting")
def __init__(self, reportname=None): self.data = ConfigSection("FrameworkJobReport") self.data.steps = [] self.data.workload = "Unknown" if reportname: self.addStep(reportname=reportname) return
def testH_ConfigSectionDictionariseInternalChildren(self): """ The test checks if any item of the dictionary_whole_tree_() result is not unexpanded instance of ConfigSection. """ config = ConfigSection("config") config.value1 = "MyValue1" config.section_("Task1") config.Task1.value2 = "MyValue2" config.Task1.section_("subSection") config.Task1.subSection.value3 = "MyValue3" d = config.dictionary_whole_tree_() for values in d.values(): self.assertFalse(isinstance(values, ConfigSection)) self.assertEqual(d["Task1"]["subSection"]["value3"], "MyValue3")
def testPSetFixup(self): """ _testPSetFixup_ Verify that all necessary parameters are set in the PSet. """ from WMCore.WMRuntime.Scripts.SetupCMSSWPset import SetupCMSSWPset if os.environ.get('CMSSW_VERSION', None): del os.environ['CMSSW_VERSION'] setupScript = SetupCMSSWPset() setupScript.step = self.createTestStep() setupScript.stepSpace = ConfigSection(name = "stepSpace") setupScript.stepSpace.location = self.testDir setupScript.job = self.createTestJob() setupScript() fixedPSet = self.loadProcessFromPSet() self.assertEqual(len(fixedPSet.source.fileNames.value), 2, "Error: Wrong number of files.") self.assertEqual(len(fixedPSet.source.secondaryFileNames.value), 2, "Error: Wrong number of secondary files.") self.assertEqual(fixedPSet.source.fileNames.value[0], "/some/file/one", "Error: Wrong input file.") self.assertEqual(fixedPSet.source.fileNames.value[1], "/some/file/two", "Error: Wrong input file.") self.assertEqual(fixedPSet.source.secondaryFileNames.value[0], "/some/parent/one", "Error: Wrong input file.") self.assertEqual(fixedPSet.source.secondaryFileNames.value[1], "/some/parent/two", "Error: Wrong input file.") self.assertEqual(fixedPSet.maxEvents.input.value, -1, "Error: Wrong maxEvents.")
def __init__(self, lfnBase, outputMods): Configuration.__init__(self) for out in outputMods: setattr(self, out, ConfigSection("output")) getattr(self, out)._internal_name = "output" getattr(self, out).lfnBase = lfnBase #'/store/temp/user/mmascher/RelValProdTTbar/mc/v6' StepConfiguration.outputMods = outputMods
def testEventsPerLumi(self): """ _testEventsPerLumi_ Verify that you can put in events per lumi in the process. """ from WMCore.WMRuntime.Scripts.SetupCMSSWPset import SetupCMSSWPset setupScript = SetupCMSSWPset() setupScript.step = self.createTestStep() setupScript.step.setEventsPerLumi(500) setupScript.stepSpace = ConfigSection(name="stepSpace") setupScript.stepSpace.location = self.testDir setupScript.job = self.createTestJob() setupScript() fixedPSet = self.loadProcessFromPSet() self.assertEqual(len(fixedPSet.source.fileNames.value), 2, "Error: Wrong number of files.") self.assertEqual(len(fixedPSet.source.secondaryFileNames.value), 2, "Error: Wrong number of secondary files.") self.assertEqual(fixedPSet.source.fileNames.value[0], "/some/file/one", "Error: Wrong input file.") self.assertEqual(fixedPSet.source.fileNames.value[1], "/some/file/two", "Error: Wrong input file.") self.assertEqual(fixedPSet.source.secondaryFileNames.value[0], "/some/parent/one", "Error: Wrong input file.") self.assertEqual(fixedPSet.source.secondaryFileNames.value[1], "/some/parent/two", "Error: Wrong input file.") self.assertEqual(fixedPSet.source.numberEventsInLuminosityBlock.value, 500, "Error: Wrong number of events per luminosity block") self.assertEqual(fixedPSet.maxEvents.input.value, -1, "Error: Wrong maxEvents.")
def __init__(self, name=None, files=None): """ A job has a jobgroup which gives it its subscription and workflow. inputFiles is a list containing files associated to a job last_update is the time the job last changed """ dict.__init__(self) self.baggage = ConfigSection("baggage") if files == None: self["input_files"] = [] else: self["input_files"] = files self["id"] = None self["jobgroup"] = None self["name"] = name self["state"] = 'new' self["state_time"] = int(time.time()) self["outcome"] = "failure" self["retry_count"] = 0 self["location"] = None self["mask"] = Mask() self["task"] = None self["fwjr"] = None self["fwjr_path"] = None self["workflow"] = None self["owner"] = None return
def testCheckLumiInformation(self): """ _testCheckLumiInformation_ Test the function that checks if all files have run lumi information """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) myReport.checkForRunLumiInformation(stepName="cmsRun1") self.assertNotEqual(myReport.getExitCode(), 70452) # Remove the lumi information on purpose myReport2 = Report("cmsRun1") myReport2.parse(self.xmlPath) fRefs = myReport2.getAllFileRefsFromStep(step="cmsRun1") for fRef in fRefs: fRef.runs = ConfigSection() myReport2.checkForRunLumiInformation(stepName="cmsRun1") self.assertFalse(myReport2.stepSuccessful(stepName="cmsRun1")) self.assertEqual(myReport2.getExitCode(), 70452) return
def setUp(self): self.testInit = TestInit(__file__) self.testInit.setLogging(logLevel=logging.DEBUG) self.testDir = self.testInit.generateWorkDir() self.config = ConfigSection("file") self.config.outputfile = os.path.join(self.testDir, "FileSinkTestNew.json")
def testChainedProcesing(self): """ test for chained CMSSW processing - check the overriden TFC, its values and that input files is / are set correctly. """ from WMCore.WMRuntime.Scripts.SetupCMSSWPset import SetupCMSSWPset os.environ['CMSSW_VERSION'] = "CMSSW_7_6_0" setupScript = SetupCMSSWPset() setupScript.step = self.createTestStep() setupScript.stepSpace = ConfigSection(name = "stepSpace") setupScript.stepSpace.location = self.testDir setupScript.job = self.createTestJob() setupScript.step.setupChainedProcessing("my_first_step", "my_input_module") setupScript() # test if the overriden TFC is right self.assertTrue(hasattr(setupScript.step.data.application, "overrideCatalog"), "Error: overriden TFC was not set") tfc = loadTFC(setupScript.step.data.application.overrideCatalog) inputFile = "../my_first_step/my_input_module.root" self.assertEqual(tfc.matchPFN("direct", inputFile), inputFile) self.assertEqual(tfc.matchLFN("direct", inputFile), inputFile) self.assertEqual(setupScript.process.source.fileNames.value, [inputFile])
def testChainedProcesing(self): """ test for chained CMSSW processing - check the overriden TFC, its values and that input files is / are set correctly. """ from WMCore.WMRuntime.Scripts.SetupCMSSWPset import SetupCMSSWPset setupScript = SetupCMSSWPset() setupScript.step = self.createTestStep() setupScript.stepSpace = ConfigSection(name="stepSpace") setupScript.stepSpace.location = self.testDir shutil.copyfile( os.path.join(os.path.dirname(__file__), "WMTaskSpace", "cmsRun1", "PSet.py"), os.path.join(setupScript.stepSpace.location, "PSet.py")) setupScript.job = self.createTestJob() setupScript.step.setupChainedProcessing("my_first_step", "my_input_module") setupScript() fixedPSet = self.loadProcessFromPSet(setupScript.stepSpace.location) # test if the overriden TFC is right print("DEBUG override: {0}".format( setupScript.step.data.application.overrideCatalog)) self.assertTrue( hasattr(setupScript.step.data.application, "overrideCatalog"), "Error: overriden TFC was not set") tfc = loadTFC(setupScript.step.data.application.overrideCatalog) inputFile = "../my_first_step/my_input_module.root" self.assertEqual(tfc.matchPFN("direct", inputFile), inputFile) self.assertEqual(tfc.matchLFN("direct", inputFile), inputFile) self.assertTrue(hasattr(fixedPSet.source, 'fileNames'))
def testEventsPerLumi(self): """ _testEventsPerLumi_ Verify that you can put in events per lumi in the process. """ from WMCore.WMRuntime.Scripts.SetupCMSSWPset import SetupCMSSWPset setupScript = SetupCMSSWPset() setupScript.step = self.createTestStep() setupScript.step.setEventsPerLumi(500) setupScript.stepSpace = ConfigSection(name="stepSpace") setupScript.stepSpace.location = self.testDir shutil.copyfile( os.path.join(os.path.dirname(__file__), "WMTaskSpace", "cmsRun1", "PSet.py"), os.path.join(setupScript.stepSpace.location, "PSet.py")) setupScript.job = self.createTestJob() setupScript() fixedPSet = self.loadProcessFromPSet(setupScript.stepSpace.location) self.assertTrue(hasattr(fixedPSet.source, 'fileNames')) self.assertTrue(hasattr(fixedPSet.source, 'secondaryFileNames')) self.assertEqual(fixedPSet.source.numberEventsInLuminosityBlock._value, 500, "Error: Wrong number of events per luminosity block") self.assertEqual(fixedPSet.maxEvents.input._value, -1, "Error: Wrong maxEvents.")
def testPSetFixup(self): """ _testPSetFixup_ Verify that all necessary parameters are set in the PSet. """ from WMCore.WMRuntime.Scripts.SetupCMSSWPset import SetupCMSSWPset setupScript = SetupCMSSWPset() setupScript.step = self.createTestStep() setupScript.stepSpace = ConfigSection(name="stepSpace") setupScript.stepSpace.location = self.testDir shutil.copyfile( os.path.join(os.path.dirname(__file__), "WMTaskSpace", "cmsRun1", "PSet.py"), os.path.join(setupScript.stepSpace.location, "PSet.py")) setupScript.job = self.createTestJob() setupScript() fixedPSet = self.loadProcessFromPSet(setupScript.stepSpace.location) self.assertTrue(hasattr(fixedPSet.source, 'fileNames')) self.assertTrue(hasattr(fixedPSet.source, 'secondaryFileNames')) self.assertEqual(fixedPSet.maxEvents.input._value, -1, "Error: Wrong maxEvents.")
def __init__(self): self.numCores = 0 self.sections = {} self.values = {} self.report = ConfigSection("performance") # # populate the aggregator with the list of expected keys # based on the functions map above # create a combined performance report with the appropriate sections for red in AggrFunctions.keys(): self.values[red] = [] sect, param = red.split(".") if not self.sections.has_key(sect): self.sections[sect] = [] self.report.section_(sect) if param not in self.sections[sect]: self.sections[sect].append(param)
def setUp(self): self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() dbName = "alerts-rest_sink" self.testInit.setupCouch(dbName) self.config = ConfigSection("rest") self.config.uri = self.testInit.couchUrl + "/" + dbName
def __init__(self, name="test"): ConfigSection.__init__(self, name) self.objectType = self.__class__.__name__ # //persistent data # // #// self.section_("persistency") self.persistency.specUrl = None # // # // request related information #// self.section_("request") self.request.priority = None # what should be the default value # // # // owner related information #// self.section_("owner") # // # // Policies applied to this workload by the processing system #// self.section_("policies") self.policies.section_("start") self.policies.section_("end") self.policies.start.policyName = None self.policies.end.policyName = None # // # // properties of the Workload and all tasks there-in #// self.section_("properties") self.properties.acquisitionEra = None self.properties.processingVersion = None self.properties.unmergedLFNBase = "/store/unmerged" self.properties.mergedLFNBase = "/store/data" self.properties.dashboardActivity = None # // # // tasks #// self.section_("tasks") self.tasks.tasklist = [] self.sandbox = None self.initialJobCount = 0
def setUp(self): self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() dbName = "couch_sink" self.testInit.setupCouch(dbName) self.config = ConfigSection("couch") self.config.url = self.testInit.couchUrl self.config.database = self.testInit.couchDbName
def __init__(self, reportname = None): self.data = ConfigSection("FrameworkJobReport") self.data.steps = [] self.data.workload = "Unknown" if reportname: self.addStep(reportname = reportname) return
def setupPromptSkimConfigObject(self): """ _setupPromptSkimConfigObject_ Creates a custom config object for testing of the skim functionality """ self.promptSkim = ConfigSection(name="Tier1Skim") self.promptSkim.SkimName = "TestSkim1" self.promptSkim.DataTier = "RECO" self.promptSkim.TwoFileRead = False self.promptSkim.ProcessingVersion = "PromptSkim-v1" self.promptSkim.ConfigURL = "http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi/CMSSW/Configuration/DataOps/python/prescaleskimmer.py?revision=1.1"
def getOutputModulesForStep(self, stepName): """ _getOutputModulesForStep_ Retrieve all the output modules for the particular step. """ step = self.getStep(stepName) if hasattr(step.data, "output"): if hasattr(step.data.output, "modules"): return step.data.output.modules return ConfigSection()
def reportWorkflowToDashboard(self, dashboardActivity): """ _reportWorkflowToDashboard_ Gathers workflow information from the arguments and reports it to the dashboard """ try: # Create a fake config conf = ConfigSection() conf.section_("DashboardReporter") conf.DashboardReporter.dashboardHost = self.dashboardHost conf.DashboardReporter.dashboardPort = self.dashboardPort # Create the reporter reporter = DashboardReporter(conf) # Assemble the info workflow = {} workflow["name"] = self.workloadName workflow["application"] = self.frameworkVersion workflow["scheduler"] = "BossAir" workflow["TaskType"] = dashboardActivity # Let's try to build information about the inputDataset dataset = "DoesNotApply" if hasattr(self, "inputDataset"): dataset = self.inputDataset workflow["datasetFull"] = dataset workflow["user"] = "******" # These two make are not reported for now workflow["GridName"] = "NotAvailable" workflow["nevtJob"] = "NotAvailable" # Send the workflow info reporter.addTask(workflow) except: # This is not critical, if it fails just leave it be logging.error("There was an error with dashboard reporting")
def setUp(self): self.config = ConfigSection("email") self.config.fromAddr = "*****@*****.**" self.config.toAddr = ["*****@*****.**", "*****@*****.**"] self.config.smtpServer = "smtp.gov" self.config.smtpUser = None self.config.smtpPass = None # now we want to mock smtp emailing stuff - via pymox - no actual # email sending to happen self.mox = mox.Mox() self.smtpReal = EmailSinkMod.smtplib EmailSinkMod.smtplib = self.mox.CreateMock(EmailSinkMod.smtplib) self.smtp = self.mox.CreateMockAnything()
def __init__(self, location, inputFiles, runAndLumis, agentNumber, lfnBase, outputMods, firstEvent=0, lastEvent=-1, firstLumi=None,\ firstRun=None, seeding=None, lheInputFiles=False, oneEventMode=False, eventsPerLumi=None, maxRuntime=None): SetupCMSSWPset.__init__(self, crabPSet=True) self.stepSpace = ConfigSection() self.stepSpace.location = location self.step = StepConfiguration(lfnBase, outputMods) self.step.section_("data") self.step.data._internal_name = "cmsRun" self.step.data.section_("application") self.step.data.application.section_("configuration") self.step.data.application.section_("command") self.step.data.application.section_("multicore") self.step.data.application.command.configuration = "PSet.py" self.step.data.application.command.oneEventMode = oneEventMode in [ "1", "True", True ] self.step.data.application.command.memoryCheck = False self.step.data.application.command.silentMemoryCheck = True # self.step.data.application.configuration.pickledarguments.globalTag/globalTagTransaction if eventsPerLumi: self.step.data.application.configuration.eventsPerLumi = eventsPerLumi if maxRuntime: self.step.data.application.configuration.maxSecondsUntilRampdown = maxRuntime self.step.data.section_("input") self.job = jobDict(lheInputFiles, seeding) self.job["input_files"] = [] for inputF in inputFiles: if isinstance(inputF, basestring): self.job["input_files"].append({"lfn": inputF, "parents": ""}) else: self.job["input_files"].append(inputF) self.job['mask'] = Mask() self.job['mask']["FirstEvent"] = firstEvent self.job['mask']["LastEvent"] = lastEvent self.job['mask']["FirstRun"] = firstRun self.job['mask']["FirstLumi"] = firstLumi self.job['mask']["runAndLumis"] = runAndLumis self.job['agentNumber'] = agentNumber self.job['counter'] = 0
def testBuildPset(self): """ _testBuildPset_ Verify that multicore parameters are set in the PSet. """ from WMCore.WMRuntime.Scripts.SetupCMSSWMulticore import SetupCMSSWMulticore setupScript = SetupCMSSWMulticore() setupScript.step = self.createTestStep() setupScript.stepSpace = ConfigSection(name="stepSpace") setupScript.stepSpace.location = self.testDir setupScript.files = {'file1': {'events': 1000}} setupScript.buildPSet() fixedPSet = self.loadProcessFromPSet() self.assertTrue( int(fixedPSet.options.multiProcesses.maxChildProcesses.value) > 0) self.assertTrue( int(fixedPSet.options.multiProcesses.maxSequentialEventsPerChild. value) > 0)
class Aggregator(object): """ _Aggregator_ Util to aggregate performance reports for multicore jobs into a single performance report, including a multicore section to allow profiling of internal performance """ def __init__(self): self.numCores = 0 self.sections = {} self.values = {} self.report = ConfigSection("performance") # # populate the aggregator with the list of expected keys # based on the functions map above # create a combined performance report with the appropriate sections for red in AggrFunctions.keys(): self.values[red] = [] sect, param = red.split(".") if not self.sections.has_key(sect): self.sections[sect] = [] self.report.section_(sect) if param not in self.sections[sect]: self.sections[sect].append(param) def add(self, perfRep): """ _add_ Add the contents of the given performance rep to this aggregator """ self.numCores += 1 for sect in self.sections.keys(): for param in self.sections[sect]: key = ".".join([sect, param]) try: #protect against weird cases like NaNs in the reports value = float(getSectParam(perfRep, sect, param)) except AttributeError: #protect against missing parameters continue except ValueError: continue self.values[key].append(value) def aggregate(self): """ _aggregate_ For each key in the map, run the appropriate aggregation function on it """ for key, vals in self.values.items(): # avoid divide by zero averages etc if len(vals) == 0: continue aggFunc = AggrFunctions[key] sect, param = key.split(".") section = getattr(self.report, sect) setattr(section, param, aggFunc(vals)) self.createMulticoreSection() return self.report def createMulticoreSection(self): """ _createMulticoreSection_ create the multicore report section """ self.report.section_("multicore") self.report.multicore.coresUsed = self.numCores if self.values.has_key("cpu.TotalJobTime"): vals = self.values["cpu.TotalJobTime"] self.report.multicore.summedProcessTime = sum(vals) self.report.multicore.averageProcessTime = average(vals) self.report.multicore.maxProcessTime = max(vals) self.report.multicore.minProcessTime = min(vals) self.report.multicore.processWaitingTime = max(vals) - min(vals) stepEffNom = float(sum(vals)) / float((max(vals) * self.numCores)) stepEffDenom = float(average(vals)) / float(max(vals)) stepEff = stepEffNom/stepEffDenom self.report.multicore.stepEfficiency = stepEff # frame in the merge report values # need to be set from the MulticoreCMSSW Executor self.report.multicore.mergeStartTime = None self.report.multicore.mergeEndTime = None self.report.multicore.numberOfMerges = None self.report.multicore.totalStepTime = None self.report.multicore.averageMergeTime = None self.report.multicore.maxMergeTime = None self.report.multicore.minMergeTime = None
#!/usr/bin/env python # -*- coding: utf-8 -*- """ __MergeSample__ Example of a report from a merge job Created on Fri Jun 8 13:22:30 2012 @author: dballest """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = '/Run195376-MuEG-Run2012B-PromptReco-v1-MuEG/DataProcessing/DataProcessingMergeSKIMStreamLogError' FrameworkJobReport.workload = 'Unknown' FrameworkJobReport.section_('cmsRun1') FrameworkJobReport.cmsRun1.status = 0 FrameworkJobReport.cmsRun1.counter = 1 FrameworkJobReport.cmsRun1.section_('cleanup') FrameworkJobReport.cmsRun1.cleanup.section_('unremoved') FrameworkJobReport.cmsRun1.cleanup.section_('removed') FrameworkJobReport.cmsRun1.cleanup.removed.fileCount = 0 FrameworkJobReport.cmsRun1.section_('errors') FrameworkJobReport.cmsRun1.section_('logs') FrameworkJobReport.cmsRun1.section_('parameters') FrameworkJobReport.cmsRun1.parameters.GeneratorInfo = '' FrameworkJobReport.cmsRun1.parameters.ReadBranches = '' FrameworkJobReport.cmsRun1.outputModules = ['Merged'] FrameworkJobReport.cmsRun1.stopTime = 1338808530.44
def testPileupSetup(self): """ Test the pileup setting. reference (setupScript.process instance): in test/python/WMCore_t/WMRuntime_t/Scripts_t/WMTaskSpace/cmsRun1/PSet.py """ try: from dbs.apis.dbsClient import DbsApi except ImportError as ex: raise unittest.SkipTest # this is modified and shortened version of # WMCore/test/python/WMCore_t/Misc_t/site-local-config.xml # since the dataset name in question (below) is only present at # storm-fe-cms.cr.cnaf.infn.it, need to make the test think it's its local SE siteLocalConfigContent = \ """ <site-local-config> <site name="-SOME-SITE-NAME-"> <event-data> <catalog url="trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap"/> </event-data> <local-stage-out> <!-- original cmssrm.fnal.gov --> <phedex-node value="T2_CH_CERN"/> <command value="test-copy"/> <catalog url="trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap"/> </local-stage-out> <calib-data> <frontier-connect> <load balance="proxies"/> <proxy url="http://cmsfrontier1.fnal.gov:3128"/> <proxy url="http://cmsfrontier2.fnal.gov:3128"/> </frontier-connect> </calib-data> </site> </site-local-config> """ siteLocalConfig = os.path.join(self.testDir, "test-site-local-config.xml") f = open(siteLocalConfig, 'w') f.write(siteLocalConfigContent) f.close() from WMCore.WMRuntime.Scripts.SetupCMSSWPset import SetupCMSSWPset setupScript = SetupCMSSWPset() setupScript.step = self.createTestStep() setupScript.stepSpace = ConfigSection(name="stepSpace") setupScript.stepSpace.location = os.path.join(self.testDir, "cmsRun1") setupScript.job = self.createTestJob() # define pileup configuration # despite of the implementation considering whichever type of pileup, # only "data" and "mc" types are eventually considered and lead to any # modifications of job input files pileupConfig = { "data": [ "/Mu/PenguinsPenguinsEverywhere-SingleMu-HorriblyJaundicedYellowEyedPenginsSearchingForCarrots-v31/RECO" ], "mc": [ "/Mu/PenguinsPenguinsEverywhere-SingleMu-HorriblyJaundicedYellowEyedPenginsSearchingForCarrots-v31/RECO" ] } dbsUrl = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader" setupScript.step.setupPileup(pileupConfig, dbsUrl) # SetupCMSSWPset pileup handling will be consulting SiteLocalConfig # to determine StorageElement (SE) name the job is running on # SiteLocalConfig loads the site-local-config.xml file from env. # variable defined location ; if the variable is not defined already, set it # obviously, if "WMAGENT_SITE_CONFIG_OVERRIDE" is already set here, the above # thick with SE name is not effective if not os.getenv("WMAGENT_SITE_CONFIG_OVERRIDE", None): os.environ["WMAGENT_SITE_CONFIG_OVERRIDE"] = siteLocalConfig # find out local site name from the testing local site config, # will be needed later siteConfig = loadSiteLocalConfig() seLocalName = siteConfig.localStageOut["phedex-node"] print("Running on site '%s', local SE name: '%s'" % (siteConfig.siteName, seLocalName)) # before calling the script, SetupCMSSWPset will try to load JSON # pileup configuration file, need to create it in self.testDir fetcher = PileupFetcher() fetcher.setWorkingDirectory(self.testDir) fetcher.createPileupConfigFile(setupScript.step) setupScript() # now test all modifications carried out in SetupCMSSWPset.__call__ # which will also test that CMSSWStepHelper.setupPileup run correctly mixModules, dataMixModules = setupScript._getPileupMixingModules() # load in the pileup configuration in the form of dict which # PileupFetcher previously saved in a JSON file pileupDict = setupScript._getPileupConfigFromJson() # get the sub dict for particular pileup type # for pileupDict structure description - see PileupFetcher._queryDbsAndGetPileupConfig for pileupType, modules in zip(("data", "mc"), (dataMixModules, mixModules)): # getting KeyError here - above pileupConfig is not correct - need # to have these two types of pile type d = pileupDict[pileupType] self._mixingModulesInputFilesTest(modules, d, seLocalName)
def getBaggage(self): confSect = ConfigSection() confSect.seeding = self.seeding confSect.lheInputFiles = self.lheInputFiles return confSect
#!/usr/bin/env python # -*- coding: utf-8 -*- """ __ErrorSample__ Example of a report from a job that failed Created on Fri Jun 8 13:22:11 2012 @author: dballest """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = '/Run195530-PhotonHad-Run2012B-PromptReco-v1-PhotonHad/DataProcessing' FrameworkJobReport.workload = 'Unknown' FrameworkJobReport.section_('cmsRun1') FrameworkJobReport.cmsRun1.status = 1 FrameworkJobReport.cmsRun1.counter = 1 FrameworkJobReport.cmsRun1.section_('errors') FrameworkJobReport.cmsRun1.errors.section_('error0') FrameworkJobReport.cmsRun1.errors.error0.type = 'CMSSWStepFailure' FrameworkJobReport.cmsRun1.errors.error0.details = '' FrameworkJobReport.cmsRun1.errors.error0.exitCode = 84 FrameworkJobReport.cmsRun1.errors.section_('error1') FrameworkJobReport.cmsRun1.errors.error1.type = 'Fatal Exception' FrameworkJobReport.cmsRun1.errors.error1.details = '' FrameworkJobReport.cmsRun1.errors.error1.exitCode = '8020' FrameworkJobReport.cmsRun1.errors.section_('error2') FrameworkJobReport.cmsRun1.errors.error2.type = 'ErrorLoggingAddition'
class Report(object): """ The base class for the new jobReport """ def __init__(self, reportname=None): self.data = ConfigSection("FrameworkJobReport") self.data.steps = [] self.data.workload = "Unknown" self.report = None self.reportname = "" if reportname: self.addStep(reportname=reportname) return def __str__(self): return str(self.data) def listSteps(self): """ _listSteps_ List the names of all the steps in the report. """ return self.data.steps def setStepStatus(self, stepName, status): """ _setStepStatus_ Set the status for a step. """ reportStep = self.retrieveStep(stepName) reportStep.status = status return def parse(self, xmlfile, stepName="cmsRun1"): """ _parse_ Read in the FrameworkJobReport XML file produced by cmsRun and pull the information from it into this object """ from WMCore.FwkJobReport.XMLParser import xmlToJobReport try: xmlToJobReport(self, xmlfile) except Exception as ex: msg = "Error reading XML job report file, possibly corrupt XML File:\n" msg += "Details: %s" % str(ex) crashMessage = "\nStacktrace:\n" stackTrace = traceback.format_tb(sys.exc_info()[2], None) for stackFrame in stackTrace: crashMessage += stackFrame logging.debug(crashMessage) raise FwkJobReportException(msg) @staticmethod def jsonizeFiles(reportModule): """ _jsonizeFiles_ Put individual files in JSON format. """ jsonFiles = [] files = getattr(reportModule, "files", None) if not files: return jsonFiles fileCount = getattr(reportModule.files, "fileCount", 0) for i in range(fileCount): reportFile = getattr(reportModule.files, "file%s" % i) jsonFile = reportFile.dictionary_() if jsonFile.get('runs', None): cfgSectionRuns = jsonFile["runs"] jsonFile["runs"] = {} for runNumber in cfgSectionRuns.listSections_(): jsonFile["runs"][str(runNumber)] = getattr( cfgSectionRuns, runNumber) jsonFiles.append(jsonFile) return jsonFiles @staticmethod def jsonizePerformance(perfSection): """ _jsonizePerformance_ Convert the performance section of the FWJR into JSON. """ jsonPerformance = {} for reportSection in ["storage", "memory", "cpu", "multicore"]: jsonPerformance[reportSection] = {} if not hasattr(perfSection, reportSection): continue jsonPerformance[reportSection] = getattr( perfSection, reportSection).dictionary_() for key in jsonPerformance[reportSection]: val = jsonPerformance[reportSection][key] if isinstance(val, float): if math.isinf(val) or math.isnan(val): jsonPerformance[reportSection][key] = None return jsonPerformance def __to_json__(self, thunker): """ __to_json__ Create a JSON version of the Report. """ jsonReport = {} jsonReport["WorkerNodeInfo"] = self.getWorkerNodeInfo() jsonReport["task"] = self.getTaskName() jsonReport["steps"] = {} jsonReport["skippedFiles"] = self.getAllSkippedFiles() jsonReport["fallbackFiles"] = self.getAllFallbackFiles() jsonReport["Campaign"] = self.getCampaign() jsonReport["PrepID"] = self.getPrepID() jsonReport["EOSLogURL"] = self.getLogURL() for stepName in self.listSteps(): reportStep = self.retrieveStep(stepName) jsonStep = {} jsonStep["status"] = reportStep.status stepTimes = self.getTimes(stepName) if stepTimes["startTime"] is not None: stepTimes["startTime"] = int(stepTimes["startTime"]) if stepTimes["stopTime"] is not None: stepTimes["stopTime"] = int(stepTimes["stopTime"]) jsonStep["start"] = stepTimes["startTime"] jsonStep["stop"] = stepTimes["stopTime"] jsonStep["performance"] = self.jsonizePerformance( reportStep.performance) jsonStep["output"] = {} for outputModule in reportStep.outputModules: reportOutputModule = getattr(reportStep.output, outputModule) jsonStep["output"][outputModule] = self.jsonizeFiles( reportOutputModule) analysisSection = getattr(reportStep, 'analysis', None) if analysisSection: jsonStep["output"]['analysis'] = self.jsonizeFiles( analysisSection) jsonStep["input"] = {} for inputSource in reportStep.input.listSections_(): reportInputSource = getattr(reportStep.input, inputSource) jsonStep["input"][inputSource] = self.jsonizeFiles( reportInputSource) jsonStep["errors"] = [] errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) jsonStep["errors"].append({ "type": reportError.type, "details": reportError.details, "exitCode": reportError.exitCode }) jsonStep["cleanup"] = {} jsonStep["parameters"] = {} jsonStep["site"] = self.getSiteName() jsonStep["analysis"] = {} jsonStep["logs"] = {} jsonReport["steps"][stepName] = jsonStep return jsonReport def getSiteName(self): """ _getSiteName_ Returns the site name attribute (no step specific) """ return getattr(self.data, 'siteName', {}) def _setSiteName(self, site): """ _setSiteName_ Set the site name attribute (no step specific) """ setattr(self.data, 'siteName', site) return def getExitCodes(self): """ _getExitCodes_ Return a list of all non-zero exit codes in the report """ returnCodes = set() for stepName in self.listSteps(): returnCodes.update(self.getStepExitCodes(stepName=stepName)) return returnCodes def getStepExitCodes(self, stepName): """ _getStepExitCodes_ Returns a list of all non-zero exit codes in the step """ returnCodes = set() reportStep = self.retrieveStep(stepName) errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) if getattr(reportError, 'exitCode', None): returnCodes.add(int(reportError.exitCode)) else: # exitCode is likely set to None(?!?) returnCodes.add(99999) return returnCodes def getExitCode(self): """ _getExitCode_ Return the first exit code you find. """ returnCode = 0 for stepName in self.listSteps(): errorCode = self.getStepExitCode(stepName=stepName) if errorCode == 99999: # Then we don't know what this error was # Mark it for return only if we don't fine an # actual error code in the job. returnCode = errorCode elif errorCode != 0: return errorCode return returnCode def getStepExitCode(self, stepName): """ _getStepExitCode_ Get the exit code for a particular step Return 0 if none """ returnCode, _ = self.getStepExitCodeAndMessage(stepName) return returnCode def getStepExitCodeAndMessage(self, stepName): """ _getStepExitCodeAndMessage_ Get the exit code and message for a particular step Return (0, None) if there were no errors. """ returnCode = 0 returnMessage = None reportStep = self.retrieveStep(stepName) errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) if not getattr(reportError, 'exitCode', None): returnCode = 99999 returnMessage = 'Unknown' else: return int(reportError.exitCode), reportError.details return returnCode, returnMessage def persist(self, filename): """ _persist_ Pickle this object and save it to disk. """ if PY3: with open(filename, 'wb') as handle: pickle.dump(encodeUnicodeToBytes(self.data), handle) else: with open(filename, 'w') as handle: pickle.dump(self.data, handle) return def unpersist(self, filename, reportname=None): """ _unpersist_ Load a pickled FWJR from disk. """ if PY3: with open(filename, 'rb') as handle: self.data = decodeBytesToUnicode(pickle.load(handle)) else: with open(filename, 'r') as handle: self.data = pickle.load(handle) # old self.report (if it existed) became unattached if reportname: self.report = getattr(self.data, reportname) return def addOutputModule(self, moduleName): """ _addOutputModule_ Add an entry for an output module. """ self.report.outputModules.append(moduleName) self.report.output.section_(moduleName) outMod = getattr(self.report.output, moduleName) outMod.section_("files") outMod.section_("dataset") outMod.files.fileCount = 0 return outMod def killOutput(self): """ _killOutput_ Remove all the output from the report. This is useful for chained processing where we don't want to keep the output from a particular step in a job. """ for outputModuleName in self.report.outputModules: delattr(self.report.output, outputModuleName) self.report.outputModules = [] return def addOutputFile(self, outputModule, aFile=None): """ _addFile_ Add an output file to the outputModule provided. """ logging.info( "addOutputFile method called with outputModule: %s, aFile: %s", outputModule, aFile) aFile = aFile or {} # Now load the output module and create the file object outMod = getattr(self.report.output, outputModule, None) if outMod is None: outMod = self.addOutputModule(outputModule) count = outMod.files.fileCount fileSection = "file%s" % count outMod.files.section_(fileSection) fileRef = getattr(outMod.files, fileSection) logging.info("addOutputFile method fileRef: %s, whole tree: %s", fileRef, fileRef.dictionary_whole_tree_()) outMod.files.fileCount += 1 # Now we need to eliminate the optional and non-primitives: # runs, parents, branches, locations and datasets keyList = list(aFile) fileRef.section_("runs") if "runs" in aFile: for run in aFile["runs"]: addRunInfoToFile(fileRef, run) keyList.remove('runs') if "parents" in aFile: setattr(fileRef, 'parents', list(aFile['parents'])) keyList.remove('parents') if "locations" in aFile: fileRef.location = list(aFile["locations"]) keyList.remove('locations') elif "PNN" in aFile: fileRef.location = [aFile["PNN"]] if "LFN" in aFile: fileRef.lfn = aFile["LFN"] keyList.remove("LFN") if "PFN" in aFile: fileRef.lfn = aFile["PFN"] keyList.remove("PFN") # All right, the rest should be JSONalizable python primitives for entry in keyList: setattr(fileRef, entry, aFile[entry]) # And we're done return fileRef def addInputSource(self, sourceName): """ _addInputSource_ Add an input source to the report doing nothing if the input source already exists. """ if hasattr(self.report.input, sourceName): return getattr(self.report.input, sourceName) self.report.input.section_(sourceName) srcMod = getattr(self.report.input, sourceName) srcMod.section_("files") srcMod.files.fileCount = 0 return srcMod def addInputFile(self, sourceName, **attrs): """ _addInputFile_ Add an input file to the given source. """ srcMod = getattr(self.report.input, sourceName, None) if srcMod is None: srcMod = self.addInputSource(sourceName) count = srcMod.files.fileCount fileSection = "file%s" % count srcMod.files.section_(fileSection) fileRef = getattr(srcMod.files, fileSection) srcMod.files.fileCount += 1 keyList = list(attrs) fileRef.section_("runs") if "runs" in attrs: for run in attrs["runs"]: addRunInfoToFile(fileRef, run) keyList.remove('runs') if "parents" in attrs: keyList.remove('parents') if "locations" in attrs: keyList.remove('locations') # All right, the rest should be JSONalizable python primitives for entry in keyList: setattr(fileRef, entry, attrs[entry]) return fileRef def addAnalysisFile(self, filename, **attrs): """ _addAnalysisFile_ Add an Analysis File. """ analysisFiles = self.report.analysis.files count = analysisFiles.fileCount label = "file%s" % count analysisFiles.section_(label) newFile = getattr(analysisFiles, label) newFile.fileName = filename for x, y in viewitems(attrs): setattr(newFile, x, y) analysisFiles.fileCount += 1 return def addRemovedCleanupFile(self, **attrs): """ _addRemovedCleanupFile_ Add a file to the cleanup.removed file """ removedFiles = self.report.cleanup.removed count = self.report.cleanup.removed.fileCount label = 'file%s' % count removedFiles.section_(label) newFile = getattr(removedFiles, label) for x, y in viewitems(attrs): setattr(newFile, x, y) self.report.cleanup.removed.fileCount += 1 return def addError(self, stepName, exitCode, errorType, errorDetails, siteName=None): """ _addError_ Add an error report with an exitCode, type/class of error and details of the error as a string. Also, report attempted site if error happened before landing on it. """ if self.retrieveStep(stepName) is None: # Create a step and set it to failed # Assumption: Adding an error fails a step self.addStep(stepName, status=1) if exitCode is not None: exitCode = int(exitCode) setExitCodes = self.getStepExitCodes(stepName) if exitCode in setExitCodes: logging.warning( "Exit code: %s has been already added to the job report", exitCode) return stepSection = self.retrieveStep(stepName) errorCount = getattr(stepSection.errors, "errorCount", 0) errEntry = "error%s" % errorCount stepSection.errors.section_(errEntry) errDetails = getattr(stepSection.errors, errEntry) errDetails.exitCode = exitCode errDetails.type = str(errorType) try: if isinstance(errorDetails, newstr): errDetails.details = errorDetails elif isinstance(errorDetails, bytes): errDetails.details = decodeBytesToUnicode( errorDetails, 'ignore') else: errDetails.details = newstr(errorDetails) except UnicodeEncodeError as ex: msg = "Failed to encode the job error details for job ID: %s." % self.getJobID( ) msg += "\nException message: %s\nOriginal error details: %s" % ( str(ex), errorDetails) logging.error(msg) msg = "DEFAULT ERROR MESSAGE, because it failed to UTF-8 encode the original message." errDetails.details = msg except UnicodeDecodeError as ex: msg = "Failed to decode the job error details for job ID: %s." % self.getJobID( ) msg += "\nException message: %s\nOriginal error details: %s" % ( str(ex), errorDetails) logging.error(msg) msg = "DEFAULT ERROR MESSAGE, because it failed to UTF-8 decode the original message." errDetails.details = msg setattr(stepSection.errors, "errorCount", errorCount + 1) self.setStepStatus(stepName=stepName, status=exitCode) if siteName: self._setSiteName(site=siteName) return def addSkippedFile(self, lfn, pfn): """ _addSkippedFile_ Report a skipped input file """ count = self.report.skipped.files.fileCount entry = "file%s" % count self.report.skipped.files.section_(entry) skipSect = getattr(self.report.skipped.files, entry) skipSect.PhysicalFileName = pfn skipSect.LogicalFileName = lfn self.report.skipped.files.fileCount += 1 return def addFallbackFile(self, lfn, pfn): """ _addFallbackFile_ Report a fallback attempt for input file """ count = self.report.fallback.files.fileCount entry = "file%s" % count self.report.fallback.files.section_(entry) fallbackSect = getattr(self.report.fallback.files, entry) fallbackSect.PhysicalFileName = pfn fallbackSect.LogicalFileName = lfn self.report.fallback.files.fileCount += 1 return def addSkippedEvent(self, run, event): """ _addSkippedEvent_ Add a skipped event. """ self.report.skipped.events.section_(str(run)) runsect = getattr(self.report.skipped.events, str(run)) if not hasattr(runsect, "eventList"): runsect.eventList = [] runsect.eventList.append(event) return def addStep(self, reportname, status=1): """ _addStep_ This creates a report section into self.report """ if hasattr(self.data, reportname): msg = "Attempted to create pre-existing report section %s" % reportname logging.error(msg) return self.data.steps.append(reportname) self.reportname = reportname self.data.section_(reportname) self.report = getattr(self.data, reportname) self.report.id = None self.report.status = status self.report.outputModules = [] # structure self.report.section_("site") self.report.section_("output") self.report.section_("input") self.report.section_("performance") self.report.section_("analysis") self.report.section_("errors") self.report.section_("skipped") self.report.section_("fallback") self.report.section_("parameters") self.report.section_("logs") self.report.section_("cleanup") self.report.analysis.section_("files") self.report.cleanup.section_("removed") self.report.cleanup.section_("unremoved") self.report.skipped.section_("events") self.report.skipped.section_("files") self.report.fallback.section_("files") self.report.skipped.files.fileCount = 0 self.report.fallback.files.fileCount = 0 self.report.analysis.files.fileCount = 0 self.report.cleanup.removed.fileCount = 0 return def setStep(self, stepName, stepSection): """ _setStep_ """ if stepName not in self.listSteps(): self.data.steps.append(stepName) else: logging.info( "Step %s is now being overridden by a new step report", stepName) self.data.section_(stepName) setattr(self.data, stepName, stepSection) return def retrieveStep(self, step): """ _retrieveStep_ Grabs a report in the raw and returns it. """ reportSection = getattr(self.data, step, None) return reportSection def load(self, filename): """ _load_ This just maps to unpersist """ self.unpersist(filename) return def save(self, filename): """ _save_ This just maps to persist """ self.persist(filename) return def getOutputModule(self, step, outputModule): """ _getOutputModule_ Get the output module from a particular step """ stepReport = self.retrieveStep(step=step) if not stepReport: return None return getattr(stepReport.output, outputModule, None) def getOutputFile(self, fileName, outputModule, step): """ _getOutputFile_ Takes a fileRef object and returns a DataStructs/File object as output """ outputMod = self.getOutputModule(step=step, outputModule=outputModule) if not outputMod: return None fileRef = getattr(outputMod.files, fileName, None) newFile = File(locations=set()) # Locations newFile.setLocation(getattr(fileRef, "location", None)) # Runs runList = fileRef.runs.listSections_() for run in runList: lumis = getattr(fileRef.runs, run) if isinstance(lumis, dict): newRun = Run(int(run), *listitems(lumis)) else: newRun = Run(int(run), *lumis) newFile.addRun(newRun) newFile["lfn"] = getattr(fileRef, "lfn", None) newFile["pfn"] = getattr(fileRef, "pfn", None) newFile["events"] = int(getattr(fileRef, "events", 0)) newFile["size"] = int(getattr(fileRef, "size", 0)) newFile["branches"] = getattr(fileRef, "branches", []) newFile["input"] = getattr(fileRef, "input", []) newFile["inputpfns"] = getattr(fileRef, "inputpfns", []) newFile["branch_hash"] = getattr(fileRef, "branch_hash", None) newFile["catalog"] = getattr(fileRef, "catalog", "") newFile["guid"] = getattr(fileRef, "guid", "") newFile["module_label"] = getattr(fileRef, "module_label", "") newFile["checksums"] = getattr(fileRef, "checksums", {}) newFile["merged"] = bool(getattr(fileRef, "merged", False)) newFile["dataset"] = getattr(fileRef, "dataset", {}) newFile["acquisitionEra"] = getattr(fileRef, 'acquisitionEra', None) newFile["processingVer"] = getattr(fileRef, 'processingVer', None) newFile["validStatus"] = getattr(fileRef, 'validStatus', None) newFile["globalTag"] = getattr(fileRef, 'globalTag', None) newFile["prep_id"] = getattr(fileRef, 'prep_id', None) newFile['configURL'] = getattr(fileRef, 'configURL', None) newFile['inputPath'] = getattr(fileRef, 'inputPath', None) newFile["outputModule"] = outputModule newFile["fileRef"] = fileRef return newFile def getAllFilesFromStep(self, step): """ _getAllFilesFromStep_ For a given step, retrieve all the associated files """ stepReport = self.retrieveStep(step=step) if not stepReport: logging.debug("Asked to retrieve files from non-existant step %s", step) return [] # steps with no outputModules can be ok (even for CMSSW steps) listOfModules = getattr(stepReport, 'outputModules', None) if not listOfModules: return [] listOfFiles = [] for module in listOfModules: listOfFiles.extend( self.getFilesFromOutputModule(step=step, outputModule=module)) return listOfFiles def getAllFiles(self): """ _getAllFiles_ Grabs all files in all output modules in all steps """ listOfFiles = [] for step in self.listSteps(): listOfFiles.extend(self.getAllFilesFromStep(step=step)) return listOfFiles def getAllInputFiles(self): """ _getAllInputFiles_ Gets all the input files """ listOfFiles = [] for step in self.listSteps(): tmp = self.getInputFilesFromStep(stepName=step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getInputFilesFromStep(self, stepName, inputSource=None): """ _getInputFilesFromStep_ Retrieve a list of input files from the given step. """ step = self.retrieveStep(stepName) if inputSource is None: inputSources = step.input.listSections_() else: inputSources = [inputSource] inputFiles = [] for inputSource in inputSources: source = getattr(step.input, inputSource) for fileNum in range(source.files.fileCount): fwjrFile = getattr(source.files, "file%d" % fileNum) lfn = getattr(fwjrFile, "lfn", None) pfn = getattr(fwjrFile, "pfn", None) size = getattr(fwjrFile, "size", 0) events = getattr(fwjrFile, "events", 0) branches = getattr(fwjrFile, "branches", []) catalog = getattr(fwjrFile, "catalog", None) guid = getattr(fwjrFile, "guid", None) inputSourceClass = getattr(fwjrFile, "input_source_class", None) moduleLabel = getattr(fwjrFile, "module_label", None) inputType = getattr(fwjrFile, "input_type", None) inputFile = File(lfn=lfn, size=size, events=events) inputFile["pfn"] = pfn inputFile["branches"] = branches inputFile["catalog"] = catalog inputFile["guid"] = guid inputFile["input_source_class"] = inputSourceClass inputFile["module_label"] = moduleLabel inputFile["input_type"] = inputType runSection = getattr(fwjrFile, "runs") runNumbers = runSection.listSections_() for runNumber in runNumbers: lumiTuple = getattr(runSection, str(runNumber)) inputFile.addRun(Run(int(runNumber), *lumiTuple)) inputFiles.append(inputFile) return inputFiles def getFilesFromOutputModule(self, step, outputModule): """ _getFilesFromOutputModule_ Grab all the files in a particular output module """ outputMod = self.getOutputModule(step=step, outputModule=outputModule) if not outputMod: return [] listOfFiles = [] for n in range(outputMod.files.fileCount): aFile = self.getOutputFile(fileName='file%i' % (n), outputModule=outputModule, step=step) if aFile: listOfFiles.append(aFile) else: msg = "Could not find file%i in module" % (n) logging.error(msg) return [] return listOfFiles def getAllSkippedFiles(self): """ _getAllSkippedFiles_ Get a list of LFNs for all the input files listed as skipped on the report. """ listOfFiles = [] for step in self.listSteps(): tmp = self.getSkippedFilesFromStep(stepName=step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getAllFallbackFiles(self): """ _getAllFallbackFiles_ Get a list of LFNs for all the input files listed as fallback attempt on the report """ listOfFiles = [] for step in self.listSteps(): tmp = self.getFallbackFilesFromStep(stepName=step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getSkippedFilesFromStep(self, stepName): """ _getSkippedFilesFromStep_ Get a list of LFNs skipped in the given step """ skippedFiles = [] step = self.retrieveStep(stepName) filesSection = step.skipped.files fileCount = getattr(filesSection, "fileCount", 0) for fileNum in range(fileCount): fileSection = getattr(filesSection, "file%d" % fileNum) lfn = getattr(fileSection, "LogicalFileName", None) if lfn is not None: skippedFiles.append(lfn) else: logging.error("Found no LFN in file %s", str(fileSection)) return skippedFiles def getFallbackFilesFromStep(self, stepName): """ _getFallbackFilesFromStep_ Get a list of LFNs which triggered a fallback in the given step """ fallbackFiles = [] step = self.retrieveStep(stepName) try: filesSection = step.fallback.files except AttributeError: return fallbackFiles fileCount = getattr(filesSection, "fileCount", 0) for fileNum in range(fileCount): fileSection = getattr(filesSection, "file%d" % fileNum) lfn = getattr(fileSection, "LogicalFileName", None) if lfn is not None: fallbackFiles.append(lfn) else: logging.error("Found no LFN in file %s", str(fileSection)) return fallbackFiles def getStepErrors(self, stepName): """ _getStepErrors_ Get all errors for a given step """ if self.retrieveStep(stepName) is None: # Create a step and set it to failed # Assumption: Adding an error fails a step self.addStep(stepName, status=1) stepSection = self.retrieveStep(stepName) errorCount = getattr(stepSection.errors, "errorCount", 0) if errorCount == 0: return {} else: return stepSection.errors.dictionary_() def stepSuccessful(self, stepName): """ _stepSuccessful_ Determine wether or not a step was successful. """ stepReport = self.retrieveStep(step=stepName) status = getattr(stepReport, 'status', 1) # We have too many possibilities if status not in [0, '0', 'success', 'Success']: return False return True def taskSuccessful(self, ignoreString='logArch'): """ _taskSuccessful_ Return True if all steps successful, False otherwise """ value = True if len(self.listSteps()) == 0: # Mark jobs as failed if they have no steps msg = "Could not find any steps" logging.error(msg) return False for stepName in self.listSteps(): # Ignore specified steps # i.e., logArch steps can fail without causing # the task to fail if ignoreString and re.search(ignoreString, stepName): continue if not self.stepSuccessful(stepName=stepName): value = False return value def getAnalysisFilesFromStep(self, step): """ _getAnalysisFilesFromStep_ Retrieve a list of all the analysis files produced in a step. """ stepReport = self.retrieveStep(step=step) if not stepReport or not hasattr(stepReport.analysis, 'files'): return [] analysisFiles = stepReport.analysis.files results = [] for fileNum in range(analysisFiles.fileCount): results.append(getattr(analysisFiles, "file%s" % fileNum)) # filter out duplicates duplicateCheck = [] filteredResults = [] for result in results: inputtag = getattr(result, 'inputtag', None) if (result.fileName, inputtag) not in duplicateCheck: duplicateCheck.append((result.fileName, inputtag)) filteredResults.append(result) return filteredResults def getAllFileRefsFromStep(self, step): """ _getAllFileRefsFromStep_ Retrieve a list of all files produced in a step. The files will be in the form of references to the ConfigSection objects in the acutal report. """ stepReport = self.retrieveStep(step=step) if not stepReport: return [] outputModules = getattr(stepReport, "outputModules", []) fileRefs = [] for outputModule in outputModules: outputModuleRef = self.getOutputModule(step=step, outputModule=outputModule) for i in range(outputModuleRef.files.fileCount): fileRefs.append(getattr(outputModuleRef.files, "file%i" % i)) analysisFiles = self.getAnalysisFilesFromStep(step) fileRefs.extend(analysisFiles) return fileRefs def addInfoToOutputFilesForStep(self, stepName, step): """ _addInfoToOutputFilesForStep_ Add the information missing from output files to the files This requires the WMStep to be passed in """ stepReport = self.retrieveStep(step=stepName) fileInfo = FileInfo() if not stepReport: return listOfModules = getattr(stepReport, 'outputModules', None) for module in listOfModules: outputMod = getattr(stepReport.output, module, None) for n in range(outputMod.files.fileCount): aFile = getattr(outputMod.files, 'file%i' % n, None) if not aFile: msg = "Could not find file%i in module" % n logging.error(msg) return fileInfo(fileReport=aFile, step=step, outputModule=module) return def deleteOutputModuleForStep(self, stepName, moduleName): """ _deleteOutputModuleForStep_ Delete any reference to the given output module in the step report that includes deleting any output file it produced """ stepReport = self.retrieveStep(step=stepName) if not stepReport: return listOfModules = getattr(stepReport, 'outputModules', []) if moduleName not in listOfModules: return delattr(stepReport.output, moduleName) listOfModules.remove(moduleName) return def setStepStartTime(self, stepName): """ _setStepStatus_ Set the startTime for a step. """ reportStep = self.retrieveStep(stepName) reportStep.startTime = time.time() return def setStepStopTime(self, stepName): """ _setStepStatus_ Set the stopTime for a step. """ reportStep = self.retrieveStep(stepName) reportStep.stopTime = time.time() return def getTimes(self, stepName): """ _getTimes_ Return a dictionary with the start and stop times """ reportStep = self.retrieveStep(stepName) startTime = getattr(reportStep, 'startTime', None) stopTime = getattr(reportStep, 'stopTime', None) return {'startTime': startTime, 'stopTime': stopTime} def getFirstStartLastStop(self): """ _getFirstStartLastStop_ Get the first startTime, last stopTime """ steps = self.listSteps() if len(steps) < 1: return None firstStep = self.getTimes(stepName=steps[0]) startTime = firstStep['startTime'] stopTime = firstStep['stopTime'] for stepName in steps: timeStamps = self.getTimes(stepName=stepName) if timeStamps['startTime'] is None or timeStamps[ 'stopTime'] is None: # Unusable times continue if startTime is None or startTime > timeStamps['startTime']: startTime = timeStamps['startTime'] if stopTime is None or stopTime < timeStamps['stopTime']: stopTime = timeStamps['stopTime'] return {'startTime': startTime, 'stopTime': stopTime} def setTaskName(self, taskName): """ _setTaskName_ Set the task name for the report """ self.data.task = taskName return def getTaskName(self): """ _getTaskName_ Return the task name """ return getattr(self.data, 'task', None) def setJobID(self, jobID): """ _setJobID_ Set the WMBS jobID """ self.data.jobID = jobID return def getJobID(self): """ _getJobID_ Get the WMBS job ID if attached """ return getattr(self.data, 'jobID', None) def getAllFileRefs(self): """ _getAllFileRefs_ Get references for all file in the step """ fileRefs = [] for step in self.listSteps(): tmpRefs = self.getAllFileRefsFromStep(step=step) if len(tmpRefs) > 0: fileRefs.extend(tmpRefs) return fileRefs def setAcquisitionProcessing(self, acquisitionEra, processingVer, processingStr=None): """ _setAcquisitionProcessing_ Set the acquisition and processing era for every output file ONLY run this after all files have been accumulated; it doesn't set things for future files. """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.acquisitionEra = acquisitionEra f.processingVer = processingVer f.processingStr = processingStr return def setValidStatus(self, validStatus): """ _setValidStatus_ Set the validStatus for all steps and all files. ONLY run this after all files have been attached. """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.validStatus = validStatus return def setGlobalTag(self, globalTag): """ _setGlobalTag_ Set the global Tag from the spec on the WN ONLY run this after all the files have been attached """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.globalTag = globalTag return def setCampaign(self, campaign): """ _setCampaign_ Set the campaign for the report """ self.data.campaign = campaign return def getCampaign(self): """ _getCampaign_ Return the campaign """ return getattr(self.data, 'campaign', "") def setPrepID(self, prep_id): """ _setGlobalTag_ Set the global Tag from the spec on the WN ONLY run this after all the files have been attached """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.prep_id = prep_id self.data.prep_id = prep_id return def getPrepID(self): """ _getPrepID_ Return the PrepID """ return getattr(self.data, 'prep_id', "") def setConfigURL(self, configURL): """ _setConfigURL_ Set the config URL in a portable storage form """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.configURL = configURL return def setInputDataset(self, inputPath): """ _setInputDataset_ Set the input dataset path for the task in each file """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.inputPath = inputPath return def setStepRSS(self, stepName, minimum, maximum, average): """ _setStepRSS_ Set the Performance RSS information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('RSSMemory') reportStep.performance.RSSMemory.min = minimum reportStep.performance.RSSMemory.max = maximum reportStep.performance.RSSMemory.average = average return def setStepPMEM(self, stepName, minimum, maximum, average): """ _setStepPMEM_ Set the Performance PMEM information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('PhysicalMemory') reportStep.performance.PhysicalMemory.min = minimum reportStep.performance.PhysicalMemory.max = maximum reportStep.performance.PhysicalMemory.average = average return def setStepPCPU(self, stepName, minimum, maximum, average): """ _setStepPCPU_ Set the Performance PCPU information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('PercentCPU') reportStep.performance.PercentCPU.min = minimum reportStep.performance.PercentCPU.max = maximum reportStep.performance.PercentCPU.average = average return def setStepVSize(self, stepName, minimum, maximum, average): """ _setStepVSize_ Set the Performance PCPU information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('VSizeMemory') reportStep.performance.VSizeMemory.min = minimum reportStep.performance.VSizeMemory.max = maximum reportStep.performance.VSizeMemory.average = average return def setStepCounter(self, stepName, counter): """ _setStepCounter_ Assign a number to the step """ reportStep = self.retrieveStep(stepName) reportStep.counter = counter return def checkForAdlerChecksum(self, stepName): """ _checkForAdlerChecksum_ Some steps require that all output files have adler checksums This will go through all output files in a step and make sure they have an adler32 checksum. If they don't it creates an error with code 60451 for the step, failing it. """ error = None files = self.getAllFilesFromStep(step=stepName) for f in files: if 'adler32' not in f.get('checksums', {}): error = f.get('lfn', None) elif f['checksums']['adler32'] is None: error = f.get('lfn', None) if error: msg = '%s, file was %s' % (WM_JOB_ERROR_CODES[60451], error) self.addError(stepName, 60451, "NoAdler32Checksum", msg) self.setStepStatus(stepName=stepName, status=60451) return def checkForRunLumiInformation(self, stepName): """ _checkForRunLumiInformation_ Some steps require that all output files have run lumi information. This will go through all output files in a step and make sure they have run/lumi informaiton. If they don't it creates an error with code 70452 for the step, failing it. """ error = None files = self.getAllFilesFromStep(step=stepName) for f in files: if not f.get('runs', None): error = f.get('lfn', None) else: for run in f['runs']: lumis = run.lumis if not lumis: error = f.get('lfn', None) break if error: msg = '%s, file was %s' % (WM_JOB_ERROR_CODES[70452], error) self.addError(stepName, 70452, "NoRunLumiInformation", msg) self.setStepStatus(stepName=stepName, status=70452) return def checkForOutputFiles(self, stepName): """ _checkForOutputFiles_ Verify that there is at least an output file, either from analysis or from an output module. """ files = self.getAllFilesFromStep(step=stepName) analysisFiles = self.getAnalysisFilesFromStep(step=stepName) if len(files) == 0 and len(analysisFiles) == 0: msg = WM_JOB_ERROR_CODES[60450] msg += "\nList of skipped files is:\n" for skipF in self.getSkippedFilesFromStep(stepName=stepName): msg += " %s\n" % skipF self.addError(stepName, 60450, "NoOutput", msg) self.setStepStatus(stepName=stepName, status=60450) return def stripInputFiles(self): """ _stripInputFiles_ If we need to compact the FWJR the easiest way is just to trim the number of input files. """ for stepName in self.listSteps(): step = self.retrieveStep(stepName) inputSources = step.input.listSections_() for inputSource in inputSources: source = getattr(step.input, inputSource) for fileNum in range(source.files.fileCount): delattr(source.files, "file%d" % fileNum) source.files.fileCount = 0 return def getWorkerNodeInfo(self): wnInfo = { "HostName": getattr(self.data, 'hostName', ''), "MachineFeatures": getattr(self.data, 'machineFeatures', {}), "JobFeatures": getattr(self.data, 'jobFeatures', {}) } return wnInfo def setLogURL(self, url): """ Set log url for the this job report. https://eoscmsweb.cern.ch/eos/cms/store/logs/prod/recent/ """ self.data.logURL = url def getLogURL(self): """ _getLogURL_ Return the log URL """ return getattr(self.data, 'logURL', '')
#!/usr/bin/env python # -*- coding: utf-8 -*- """ __ProcessingSample__ Example of a report from a processing job with multiple output modules Created on Fri Jun 8 11:27:53 2012 @author: dballest """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = '/Run195529-SingleElectron-Run2012B-PromptReco-v1-SingleElectron/DataProcessing' FrameworkJobReport.workload = 'Unknown' FrameworkJobReport.section_('cmsRun1') FrameworkJobReport.cmsRun1.status = 0 FrameworkJobReport.cmsRun1.counter = 1 FrameworkJobReport.cmsRun1.section_('cleanup') FrameworkJobReport.cmsRun1.cleanup.section_('unremoved') FrameworkJobReport.cmsRun1.cleanup.section_('removed') FrameworkJobReport.cmsRun1.cleanup.removed.fileCount = 0 FrameworkJobReport.cmsRun1.section_('errors') FrameworkJobReport.cmsRun1.section_('logs') FrameworkJobReport.cmsRun1.section_('parameters') FrameworkJobReport.cmsRun1.parameters.GeneratorInfo = '' FrameworkJobReport.cmsRun1.parameters.ReadBranches = '' FrameworkJobReport.cmsRun1.outputModules = ['SKIMStreamDiTau', 'SKIMStreamHighMET', 'SKIMStreamLogError', 'SKIMStreamLogErrorMonitor', 'SKIMStreamTOPElePlusJets', 'SKIMStreamWElectron'] FrameworkJobReport.cmsRun1.stopTime = 1339094634.7 FrameworkJobReport.cmsRun1.section_('site')
<proxy url="http://cmsfrontier2.fnal.gov:3128"/> </frontier-connect> </calib-data> </site> </site-local-config> """ siteLocalConfig = os.path.join(self.testDir, "test-site-local-config.xml") f = open(siteLocalConfig, 'w') f.write(siteLocalConfigContent) f.close() from WMCore.WMRuntime.Scripts.SetupCMSSWPset import SetupCMSSWPset setupScript = SetupCMSSWPset() setupScript.step = self.createTestStep() setupScript.stepSpace = ConfigSection(name="stepSpace") setupScript.stepSpace.location = os.path.join(self.testDir, "cmsRun1") setupScript.job = self.createTestJob() # define pileup configuration # despite of the implementation considering whichever type of pileup, # only "data" and "mc" types are eventually considered and lead to any # modifications of job input files pileupConfig = { "data": [ "/Mu/PenguinsPenguinsEverywhere-SingleMu-HorriblyJaundicedYellowEyedPenginsSearchingForCarrots-v31/RECO" ], "mc": [ "/Mu/PenguinsPenguinsEverywhere-SingleMu-HorriblyJaundicedYellowEyedPenginsSearchingForCarrots-v31/RECO" ] } dbsUrl = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
#!/usr/bin/env python # -*- coding: utf-8 -*- """ __FallbackSample__ Example of a report from a job that had xrootd fallback reads """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = '/Run195530-PhotonHad-Run2012B-PromptReco-v1-PhotonHad/DataProcessing' FrameworkJobReport.workload = 'Unknown' FrameworkJobReport.section_('cmsRun1') FrameworkJobReport.cmsRun1.status = 1 FrameworkJobReport.cmsRun1.section_('cleanup') FrameworkJobReport.cmsRun1.cleanup.section_('unremoved') FrameworkJobReport.cmsRun1.cleanup.section_('removed') FrameworkJobReport.cmsRun1.cleanup.removed.fileCount = 0 FrameworkJobReport.cmsRun1.section_('errors') FrameworkJobReport.cmsRun1.section_('logs') FrameworkJobReport.cmsRun1.section_('parameters') FrameworkJobReport.cmsRun1.parameters.ReadBranches = '' FrameworkJobReport.cmsRun1.outputModules = [] FrameworkJobReport.cmsRun1.section_('site') FrameworkJobReport.cmsRun1.section_('analysis') FrameworkJobReport.cmsRun1.analysis.section_('files') FrameworkJobReport.cmsRun1.analysis.files.fileCount = 0 FrameworkJobReport.cmsRun1.section_('performance')
class Report: """ The base class for the new jobReport """ def __init__(self, reportname = None): self.data = ConfigSection("FrameworkJobReport") self.data.steps = [] self.data.workload = "Unknown" if reportname: self.addStep(reportname = reportname) return def __str__(self): return str(self.data) def listSteps(self): """ _listSteps_ List the names of all the steps in the report. """ return self.data.steps def setStepStatus(self, stepName, status): """ _setStepStatus_ Set the status for a step. """ reportStep = self.retrieveStep(stepName) reportStep.status = status return def parse(self, xmlfile, stepName = "cmsRun1"): """ _parse_ Read in the FrameworkJobReport XML file produced by cmsRun and pull the information from it into this object """ from WMCore.FwkJobReport.XMLParser import xmlToJobReport try: xmlToJobReport(self, xmlfile) except Exception as ex: msg = "Error reading XML job report file, possibly corrupt XML File:\n" msg += "Details: %s" % str(ex) crashMessage = "\nStacktrace:\n" stackTrace = traceback.format_tb(sys.exc_info()[2], None) for stackFrame in stackTrace: crashMessage += stackFrame self.addError(stepName, 50115, "BadFWJRXML", msg) raise FwkJobReportException(msg) def jsonizeFiles(self, reportModule): """ _jsonizeFiles_ Put individual files in JSON format. """ jsonFiles = [] files = getattr(reportModule, "files", None) if not files: return jsonFiles fileCount = getattr(reportModule.files, "fileCount", 0) for i in range(fileCount): reportFile = getattr(reportModule.files, "file%s" % i) jsonFile = reportFile.dictionary_() if jsonFile.get('runs', None): cfgSectionRuns = jsonFile["runs"] jsonFile["runs"] = {} for runNumber in cfgSectionRuns.listSections_(): jsonFile["runs"][str(runNumber)] = getattr(cfgSectionRuns, runNumber) jsonFiles.append(jsonFile) return jsonFiles def jsonizePerformance(self, perfSection): """ _jsonizePerformance_ Convert the performance section of the FWJR into JSON. """ jsonPerformance = {} for reportSection in ["storage", "memory", "cpu", "multicore"]: jsonPerformance[reportSection] = {} if not hasattr(perfSection, reportSection): continue jsonPerformance[reportSection] = getattr(perfSection, reportSection).dictionary_() for key in jsonPerformance[reportSection].keys(): val = jsonPerformance[reportSection][key] if type(val) == types.FloatType: if math.isinf(val) or math.isnan(val): jsonPerformance[reportSection][key] = None return jsonPerformance def __to_json__(self, thunker): """ __to_json__ Create a JSON version of the Report. """ jsonReport = {} jsonReport["task"] = self.getTaskName() jsonReport["steps"] = {} jsonReport["skippedFiles"] = self.getAllSkippedFiles() jsonReport["fallbackFiles"] = self.getAllFallbackFiles() for stepName in self.listSteps(): reportStep = self.retrieveStep(stepName) jsonStep = {} jsonStep["status"] = reportStep.status stepTimes = self.getTimes(stepName) if stepTimes["startTime"] != None: stepTimes["startTime"] = int(stepTimes["startTime"]) if stepTimes["stopTime"] != None: stepTimes["stopTime"] = int(stepTimes["stopTime"]) jsonStep["start"] = stepTimes["startTime"] jsonStep["stop"] = stepTimes["stopTime"] jsonStep["performance"] = self.jsonizePerformance(reportStep.performance) jsonStep["output"] = {} for outputModule in reportStep.outputModules: reportOutputModule = getattr(reportStep.output, outputModule) jsonStep["output"][outputModule] = self.jsonizeFiles(reportOutputModule) analysisSection = getattr(reportStep, 'analysis', None) if analysisSection: jsonStep["output"]['analysis'] = self.jsonizeFiles(analysisSection) jsonStep["input"] = {} for inputSource in reportStep.input.listSections_(): reportInputSource = getattr(reportStep.input, inputSource) jsonStep["input"][inputSource] = self.jsonizeFiles(reportInputSource) jsonStep["errors"] = [] errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) jsonStep["errors"].append({"type": reportError.type, "details": reportError.details, "exitCode": reportError.exitCode}) jsonStep["cleanup"] = {} jsonStep["parameters"] = {} jsonStep["site"] = self.getSiteName() jsonStep["analysis"] = {} jsonStep["logs"] = {} jsonReport["steps"][stepName] = jsonStep return jsonReport def getSiteName(self): """ _getSiteName_ Returns the site name attribute (no step specific) """ return getattr(self.data, 'siteName', {}) def getExitCodes(self): """ _getExitCodes_ Return a list of all non-zero exit codes in the report """ returnCodes = set() for stepName in self.listSteps(): returnCodes.update(self.getStepExitCodes(stepName = stepName)) return returnCodes def getStepExitCodes(self, stepName): """ _getStepExitCodes_ Returns a list of all non-zero exit codes in the step """ returnCodes = set() reportStep = self.retrieveStep(stepName) errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) if getattr(reportError, 'exitCode', None): returnCodes.add(int(reportError.exitCode)) return returnCodes def getExitCode(self): """ _getExitCode_ Return the first exit code you find. """ returnCode = 0 for stepName in self.listSteps(): errorCode = self.getStepExitCode(stepName = stepName) if errorCode == 99999: # Then we don't know what this error was # Mark it for return only if we don't fine an # actual error code in the job. returnCode = errorCode elif errorCode != 0: return errorCode return returnCode def getStepExitCode(self, stepName): """ _getStepExitCode_ Get the exit code for a particular step Return 0 if none """ returnCode = 0 reportStep = self.retrieveStep(stepName) errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) if not getattr(reportError, 'exitCode', None): returnCode = 99999 else: return int(reportError.exitCode) return returnCode def persist(self, filename): """ _persist_ Pickle this object and save it to disk. """ handle = open(filename, 'w') cPickle.dump(self.data, handle) handle.close() return def unpersist(self, filename, reportname = None): """ _unpersist_ Load a pickled FWJR from disk. """ handle = open(filename, 'r') self.data = cPickle.load(handle) handle.close() # old self.report (if it existed) became unattached if reportname: self.report = getattr(self.data, reportname) return def addOutputModule(self, moduleName): """ _addOutputModule_ Add an entry for an output module. """ self.report.outputModules.append(moduleName) self.report.output.section_(moduleName) outMod = getattr(self.report.output, moduleName) outMod.section_("files") outMod.section_("dataset") outMod.files.fileCount = 0 return outMod def killOutput(self): """ _killOutput_ Remove all the output from the report. This is useful for chained processing where we don't want to keep the output from a particular step in a job. """ for outputModuleName in self.report.outputModules: delattr(self.report.output, outputModuleName) self.report.outputModules = [] return def addOutputFile(self, outputModule, file = {}): """ _addFile_ Add an output file to the outputModule provided. """ if not checkFileForCompletion(file): # Then the file is not complete, and should not be added print "ERROR" return None # Now load the output module and create the file object outMod = getattr(self.report.output, outputModule, None) if outMod == None: outMod = self.addOutputModule(outputModule) count = outMod.files.fileCount fileSection = "file%s" % count outMod.files.section_(fileSection) fileRef = getattr(outMod.files, fileSection) outMod.files.fileCount += 1 # Now we need to eliminate the optional and non-primitives: # runs, parents, branches, locations and datasets keyList = file.keys() fileRef.section_("runs") if "runs" in file: for run in file["runs"]: addRunInfoToFile(fileRef, run) keyList.remove('runs') if "parents" in file: setattr(fileRef, 'parents', list(file['parents'])) keyList.remove('parents') if "locations" in file: fileRef.location = list(file["locations"]) keyList.remove('locations') elif "SEName" in file: fileRef.location = [file["SEName"]] if "LFN" in file: fileRef.lfn = file["LFN"] keyList.remove("LFN") if "PFN" in file: fileRef.lfn = file["PFN"] keyList.remove("PFN") # All right, the rest should be JSONalizable python primitives for entry in keyList: setattr(fileRef, entry, file[entry]) #And we're done return fileRef def addInputSource(self, sourceName): """ _addInputSource_ Add an input source to the report doing nothing if the input source already exists. """ if hasattr(self.report.input, sourceName): return getattr(self.report.input, sourceName) self.report.input.section_(sourceName) srcMod = getattr(self.report.input, sourceName) srcMod.section_("files") srcMod.files.fileCount = 0 return srcMod def addInputFile(self, sourceName, **attrs): """ _addInputFile_ Add an input file to the given source. """ srcMod = getattr(self.report.input, sourceName, None) if srcMod == None: srcMod = self.addInputSource(sourceName) count = srcMod.files.fileCount fileSection = "file%s" % count srcMod.files.section_(fileSection) fileRef = getattr(srcMod.files, fileSection) srcMod.files.fileCount += 1 keyList = attrs.keys() fileRef.section_("runs") if "runs" in attrs: for run in attrs["runs"]: addRunInfoToFile(fileRef, run) keyList.remove('runs') if "parents" in attrs: keyList.remove('parents') if "locations" in attrs: keyList.remove('locations') # All right, the rest should be JSONalizable python primitives for entry in keyList: setattr(fileRef, entry, attrs[entry]) return fileRef def addAnalysisFile(self, filename, **attrs): """ _addAnalysisFile_ Add an Analysis File. """ analysisFiles = self.report.analysis.files count = analysisFiles.fileCount label = "file%s" % count analysisFiles.section_(label) newFile = getattr(analysisFiles, label) newFile.fileName = filename [ setattr(newFile, x, y) for x, y in attrs.items() ] analysisFiles.fileCount += 1 return def addRemovedCleanupFile(self, **attrs): """ _addRemovedCleanupFile_ Add a file to the cleanup.removed file """ removedFiles = self.report.cleanup.removed count = self.report.cleanup.removed.fileCount label = 'file%s' % count removedFiles.section_(label) newFile = getattr(removedFiles, label) [ setattr(newFile, x, y) for x, y in attrs.items() ] self.report.cleanup.removed.fileCount += 1 return def addError(self, stepName, exitCode, errorType, errorDetails): """ _addError_ Add an error report with an exitCode, type/class of error and details of the error as a string """ if self.retrieveStep(stepName) == None: # Create a step and set it to failed # Assumption: Adding an error fails a step self.addStep(stepName, status = 1) stepSection = self.retrieveStep(stepName) errorCount = getattr(stepSection.errors, "errorCount", 0) errEntry = "error%s" % errorCount stepSection.errors.section_(errEntry) errDetails = getattr(stepSection.errors, errEntry) errDetails.exitCode = exitCode errDetails.type = str(errorType) errDetails.details = errorDetails setattr(stepSection.errors, "errorCount", errorCount +1) return def addSkippedFile(self, lfn, pfn): """ _addSkippedFile_ Report a skipped input file """ count = self.report.skipped.files.fileCount entry = "file%s" % count self.report.skipped.files.section_(entry) skipSect = getattr(self.report.skipped.files, entry) skipSect.PhysicalFileName = pfn skipSect.LogicalFileName = lfn self.report.skipped.files.fileCount += 1 return def addFallbackFile(self, lfn, pfn): """ _addFallbackFile_ Report a fallback attempt for input file """ count = self.report.fallback.files.fileCount entry = "file%s" % count self.report.fallback.files.section_(entry) fallbackSect = getattr(self.report.fallback.files, entry) fallbackSect.PhysicalFileName = pfn fallbackSect.LogicalFileName = lfn self.report.fallback.files.fileCount += 1 return def addSkippedEvent(self, run, event): """ _addSkippedEvent_ Add a skipped event. """ self.report.skipped.events.section_(str(run)) runsect = getattr(self.report.skipped.events, str(run)) if not hasattr(runsect, "eventList"): runsect.eventList = [] runsect.eventList.append(event) return def addStep(self, reportname, status = 1): """ _addStep_ This creates a report section into self.report """ if hasattr(self.data, reportname): msg = "Attempted to create pre-existing report section %s" % (reportname) logging.error(msg) return self.data.steps.append(reportname) self.reportname = reportname self.data.section_(reportname) self.report = getattr(self.data, reportname) self.report.id = None self.report.status = status self.report.outputModules = [] # structure self.report.section_("site") self.report.section_("output") self.report.section_("input") self.report.section_("performance") self.report.section_("analysis") self.report.section_("errors") self.report.section_("skipped") self.report.section_("fallback") self.report.section_("parameters") self.report.section_("logs") self.report.section_("cleanup") self.report.analysis.section_("files") self.report.cleanup.section_("removed") self.report.cleanup.section_("unremoved") self.report.skipped.section_("events") self.report.skipped.section_("files") self.report.fallback.section_("files") self.report.skipped.files.fileCount = 0 self.report.fallback.files.fileCount = 0 self.report.analysis.files.fileCount = 0 self.report.cleanup.removed.fileCount = 0 return def setStep(self, stepName, stepSection): """ _setStep_ """ if not stepName in self.data.steps: self.data.steps.append(stepName) else: logging.info("Step %s is now being overridden by a new step report" % stepName) self.data.section_(stepName) setattr(self.data, stepName, stepSection) return def retrieveStep(self, step): """ _retrieveStep_ Grabs a report in the raw and returns it. """ reportSection = getattr(self.data, step, None) return reportSection def load(self, filename): """ _load_ This just maps to unpersist """ self.unpersist(filename) return def save(self, filename): """ _save_ This just maps to persist """ self.persist(filename) return def getOutputModule(self, step, outputModule): """ _getOutputModule_ Get the output module from a particular step """ stepReport = self.retrieveStep(step = step) if not stepReport: return None return getattr(stepReport.output, outputModule, None) def getOutputFile(self, fileName, outputModule, step): """ _getOutputFile_ Takes a fileRef object and returns a DataStructs/File object as output """ outputMod = self.getOutputModule(step = step, outputModule = outputModule) if not outputMod: return None fileRef = getattr(outputMod.files, fileName, None) newFile = File(locations = set()) #Locations newFile.setLocation(getattr(fileRef, "location", None)) #Runs runList = fileRef.runs.listSections_() for run in runList: lumis = getattr(fileRef.runs, run) newRun = Run(int(run), *lumis) newFile.addRun(newRun) newFile["lfn"] = getattr(fileRef, "lfn", None) newFile["pfn"] = getattr(fileRef, "pfn", None) newFile["events"] = int(getattr(fileRef, "events", 0)) newFile["size"] = int(getattr(fileRef, "size", 0)) newFile["branches"] = getattr(fileRef, "branches", []) newFile["input"] = getattr(fileRef, "input", []) newFile["inputpfns"] = getattr(fileRef, "inputpfns", []) newFile["branch_hash"] = getattr(fileRef, "branch_hash", None) newFile["catalog"] = getattr(fileRef, "catalog", "") newFile["guid"] = getattr(fileRef, "guid", "") newFile["module_label"] = getattr(fileRef, "module_label", "") newFile["checksums"] = getattr(fileRef, "checksums", {}) newFile["merged"] = bool(getattr(fileRef, "merged", False)) newFile["dataset"] = getattr(fileRef, "dataset", {}) newFile["acquisitionEra"] = getattr(fileRef, 'acquisitionEra', None) newFile["processingVer"] = getattr(fileRef, 'processingVer', None) newFile["validStatus"] = getattr(fileRef, 'validStatus', None) newFile["globalTag"] = getattr(fileRef, 'globalTag', None) newFile["prep_id"] = getattr(fileRef, 'prep_id', None) newFile['configURL'] = getattr(fileRef, 'configURL', None) newFile['inputPath'] = getattr(fileRef, 'inputPath', None) newFile["outputModule"] = outputModule newFile["fileRef"] = fileRef return newFile def getAllFilesFromStep(self, step): """ _getAllFilesFromStep_ For a given step, retrieve all the associated files """ stepReport = self.retrieveStep(step = step) if not stepReport: logging.debug("Asked to retrieve files from non-existant step %s" % step) return [] listOfModules = getattr(stepReport, 'outputModules', None) if not listOfModules: logging.debug("Asked to retrieve files from step %s with no outputModules" % step) logging.debug("StepReport: %s" % stepReport) return [] listOfFiles = [] for module in listOfModules: tmpList = self.getFilesFromOutputModule(step = step, outputModule = module) if not tmpList: continue listOfFiles.extend(tmpList) return listOfFiles def getAllFiles(self): """ _getAllFiles_ Grabs all files in all output modules in all steps """ listOfFiles = [] for step in self.data.steps: tmp = self.getAllFilesFromStep(step = step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getAllInputFiles(self): """ _getAllInputFiles_ Gets all the input files """ listOfFiles = [] for step in self.data.steps: tmp = self.getInputFilesFromStep(stepName = step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getInputFilesFromStep(self, stepName, inputSource = None): """ _getInputFilesFromStep_ Retrieve a list of input files from the given step. """ step = self.retrieveStep(stepName) inputSources = [] if inputSource == None: inputSources = step.input.listSections_() else: inputSources = [inputSource] inputFiles = [] for inputSource in inputSources: source = getattr(step.input, inputSource) for fileNum in range(source.files.fileCount): fwjrFile = getattr(source.files, "file%d" % fileNum) lfn = getattr(fwjrFile, "lfn", None) pfn = getattr(fwjrFile, "pfn", None) size = getattr(fwjrFile, "size", 0) events = getattr(fwjrFile, "events", 0) branches = getattr(fwjrFile, "branches", []) catalog = getattr(fwjrFile, "catalog", None) guid = getattr(fwjrFile, "guid", None) inputSourceClass = getattr(fwjrFile, "input_source_class", None) moduleLabel = getattr(fwjrFile, "module_label", None) inputType = getattr(fwjrFile, "input_type", None) inputFile = File(lfn = lfn, size = size, events = events) inputFile["pfn"] = pfn inputFile["branches"] = branches inputFile["catalog"] = catalog inputFile["guid"] = guid inputFile["input_source_class"] = inputSourceClass inputFile["module_label"] = moduleLabel inputFile["input_type"] = inputType runSection = getattr(fwjrFile, "runs") runNumbers = runSection.listSections_() for runNumber in runNumbers: lumiTuple = getattr(runSection, str(runNumber)) inputFile.addRun(Run(int(runNumber), *lumiTuple)) inputFiles.append(inputFile) return inputFiles def getFilesFromOutputModule(self, step, outputModule): """ _getFilesFromOutputModule_ Grab all the files in a particular output module """ listOfFiles = [] outputMod = self.getOutputModule(step = step, outputModule = outputModule) if not outputMod: return None for n in range(outputMod.files.fileCount): file = self.getOutputFile(fileName = 'file%i' %(n), outputModule = outputModule, step = step) if not file: msg = "Could not find file%i in module" % (n) logging.error(msg) return None #Now, append to the list of files listOfFiles.append(file) return listOfFiles def getAllSkippedFiles(self): """ _getAllSkippedFiles_ Get a list of LFNs for all the input files listed as skipped on the report. """ listOfFiles = [] for step in self.data.steps: tmp = self.getSkippedFilesFromStep(stepName = step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getAllFallbackFiles(self): """ _getAllFallbackFiles_ Get a list of LFNs for all the input files listed as fallback attempt on the report """ listOfFiles = [] for step in self.data.steps: tmp = self.getFallbackFilesFromStep(stepName = step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getSkippedFilesFromStep(self, stepName): """ _getSkippedFilesFromStep_ Get a list of LFNs skipped in the given step """ skippedFiles = [] step = self.retrieveStep(stepName) filesSection = step.skipped.files fileCount = getattr(filesSection, "fileCount", 0) for fileNum in range(fileCount): fileSection = getattr(filesSection, "file%d" % fileNum) lfn = getattr(fileSection, "LogicalFileName", None) if lfn is not None: skippedFiles.append(lfn) else: logging.error("Found no LFN in file %s" % str(fileSection)) return skippedFiles def getFallbackFilesFromStep(self, stepName): """ _getFallbackFilesFromStep_ Get a list of LFNs which triggered a fallback in the given step """ fallbackFiles = [] step = self.retrieveStep(stepName) try: filesSection = step.fallback.files except AttributeError: return fallbackFiles fileCount = getattr(filesSection, "fileCount", 0) for fileNum in range(fileCount): fileSection = getattr(filesSection, "file%d" % fileNum) lfn = getattr(fileSection, "LogicalFileName", None) if lfn is not None: fallbackFiles.append(lfn) else: logging.error("Found no LFN in file %s" % str(fileSection)) return fallbackFiles def getStepErrors(self, stepName): """ _getStepErrors_ Get all errors for a given step """ if self.retrieveStep(stepName) == None: # Create a step and set it to failed # Assumption: Adding an error fails a step self.addStep(stepName, status = 1) stepSection = self.retrieveStep(stepName) errorCount = getattr(stepSection.errors, "errorCount", 0) if errorCount == 0: return {} else: return stepSection.errors.dictionary_() def stepSuccessful(self, stepName): """ _stepSuccessful_ Determine wether or not a step was successful. """ stepReport = self.retrieveStep(step = stepName) status = getattr(stepReport, 'status', 1) # We have too many possibilities if status not in [0, '0', 'success', 'Success']: return False return True def taskSuccessful(self, ignoreString = 'logArch'): """ _taskSuccessful_ Return True if all steps successful, False otherwise """ value = True if len(self.data.steps) == 0: # Mark jobs as failed if they have no steps msg = "Could not find any steps" logging.error(msg) return False for stepName in self.data.steps: # Ignore specified steps # i.e., logArch steps can fail without causing # the task to fail if ignoreString and re.search(ignoreString, stepName): continue if not self.stepSuccessful(stepName = stepName): value = False return value def getAnalysisFilesFromStep(self, step): """ _getAnalysisFilesFromStep_ Retrieve a list of all the analysis files produced in a step. """ stepReport = self.retrieveStep(step=step) if not stepReport or not hasattr(stepReport.analysis, 'files'): return [] analysisFiles = stepReport.analysis.files results = [] for fileNum in range(analysisFiles.fileCount): results.append(getattr(analysisFiles, "file%s" % fileNum)) # filter out duplicates duplicateCheck = [] filteredResults = [] for result in results: inputtag = getattr(result, 'inputtag', None) if (result.fileName, inputtag) not in duplicateCheck: duplicateCheck.append((result.fileName, inputtag)) filteredResults.append(result) return filteredResults def getAllFileRefsFromStep(self, step): """ _getAllFileRefsFromStep_ Retrieve a list of all files produced in a step. The files will be in the form of references to the ConfigSection objects in the acutal report. """ stepReport = self.retrieveStep(step = step) if not stepReport: return [] outputModules = getattr(stepReport, "outputModules", []) fileRefs = [] for outputModule in outputModules: outputModuleRef = self.getOutputModule(step = step, outputModule = outputModule) for i in range(outputModuleRef.files.fileCount): fileRefs.append(getattr(outputModuleRef.files, "file%i" % i)) analysisFiles = self.getAnalysisFilesFromStep(step) fileRefs.extend(analysisFiles) return fileRefs def addInfoToOutputFilesForStep(self, stepName, step): """ _addInfoToOutputFilesForStep_ Add the information missing from output files to the files This requires the WMStep to be passed in """ stepReport = self.retrieveStep(step = stepName) fileInfo = FileInfo() if not stepReport: return None listOfModules = getattr(stepReport, 'outputModules', None) for module in listOfModules: outputMod = getattr(stepReport.output, module, None) for n in range(outputMod.files.fileCount): file = getattr(outputMod.files, 'file%i' %(n), None) if not file: msg = "Could not find file%i in module" % (n) logging.error(msg) return None fileInfo(fileReport = file, step = step, outputModule = module) return def deleteOutputModuleForStep(self, stepName, moduleName): """ _deleteOutputModuleForStep_ Delete any reference to the given output module in the step report that includes deleting any output file it produced """ stepReport = self.retrieveStep(step = stepName) if not stepReport: return listOfModules = getattr(stepReport, 'outputModules', []) if moduleName not in listOfModules: return delattr(stepReport.output, moduleName) listOfModules.remove(moduleName) return def setStepStartTime(self, stepName): """ _setStepStatus_ Set the startTime for a step. """ reportStep = self.retrieveStep(stepName) reportStep.startTime = time.time() return def setStepStopTime(self, stepName): """ _setStepStatus_ Set the stopTime for a step. """ reportStep = self.retrieveStep(stepName) reportStep.stopTime = time.time() return def getTimes(self, stepName): """ _getTimes_ Return a dictionary with the start and stop times """ reportStep = self.retrieveStep(stepName) startTime = getattr(reportStep, 'startTime', None) stopTime = getattr(reportStep, 'stopTime', None) return {'startTime': startTime, 'stopTime': stopTime} def getFirstStartLastStop(self): """ _getFirstStartLastStop_ Get the first startTime, last stopTime """ steps = self.listSteps() if len(steps) < 1: return None firstStep = self.getTimes(stepName = steps[0]) startTime = firstStep['startTime'] stopTime = firstStep['stopTime'] for stepName in steps: timeStamps = self.getTimes(stepName = stepName) if timeStamps['startTime'] is None or timeStamps['stopTime'] is None: # Unusable times continue if startTime is None or startTime > timeStamps['startTime']: startTime = timeStamps['startTime'] if stopTime is None or stopTime < timeStamps['stopTime']: stopTime = timeStamps['stopTime'] return {'startTime': startTime, 'stopTime': stopTime} def setTaskName(self, taskName): """ _setTaskName_ Set the task name for the report """ self.data.task = taskName return def getTaskName(self): """ _getTaskName_ Return the task name """ return getattr(self.data, 'task', None) def setJobID(self, jobID): """ _setJobID_ Set the WMBS jobID """ self.data.jobID = jobID return def getJobID(self): """ _getJobID_ Get the WMBS job ID if attached """ return getattr(self.data, 'jobID', None) def getAllFileRefs(self): """ _getAllFileRefs_ Get references for all file in the step """ fileRefs = [] for step in self.data.steps: tmpRefs = self.getAllFileRefsFromStep(step = step) if len(tmpRefs) > 0: fileRefs.extend(tmpRefs) return fileRefs def setAcquisitionProcessing(self, acquisitionEra, processingVer, processingStr = None): """ _setAcquisitionProcessing_ Set the acquisition and processing era for every output file ONLY run this after all files have been accumulated; it doesn't set things for future files. """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.acquisitionEra = acquisitionEra f.processingVer = processingVer f.processingStr = processingStr return def setValidStatus(self, validStatus): """ _setValidStatus_ Set the validStatus for all steps and all files. ONLY run this after all files have been attached. """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.validStatus = validStatus return def setGlobalTag(self, globalTag): """ _setGlobalTag_ Set the global Tag from the spec on the WN ONLY run this after all the files have been attached """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.globalTag = globalTag return def setPrepID(self, prep_id): """ _setGlobalTag_ Set the global Tag from the spec on the WN ONLY run this after all the files have been attached """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.prep_id = prep_id return def setConfigURL(self, configURL): """ _setConfigURL_ Set the config URL in a portable storage form """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.configURL = configURL return def setInputDataset(self, inputPath): """ _setInputDataset_ Set the input dataset path for the task in each file """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.inputPath = inputPath return def setStepRSS(self, stepName, min, max, average): """ _setStepRSS_ Set the Performance RSS information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('RSSMemory') reportStep.performance.RSSMemory.min = min reportStep.performance.RSSMemory.max = max reportStep.performance.RSSMemory.average = average return def setStepPMEM(self, stepName, min, max, average): """ _setStepPMEM_ Set the Performance PMEM information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('PhysicalMemory') reportStep.performance.PhysicalMemory.min = min reportStep.performance.PhysicalMemory.max = max reportStep.performance.PhysicalMemory.average = average return def setStepPCPU(self, stepName, min, max, average): """ _setStepPCPU_ Set the Performance PCPU information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('PercentCPU') reportStep.performance.PercentCPU.min = min reportStep.performance.PercentCPU.max = max reportStep.performance.PercentCPU.average = average return def setStepVSize(self, stepName, min, max, average): """ _setStepVSize_ Set the Performance PCPU information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('VSizeMemory') reportStep.performance.VSizeMemory.min = min reportStep.performance.VSizeMemory.max = max reportStep.performance.VSizeMemory.average = average return def setStepCounter(self, stepName, counter): """ _setStepCounter_ Assign a number to the step """ reportStep = self.retrieveStep(stepName) reportStep.counter = counter return def checkForAdlerChecksum(self, stepName): """ _checkForAdlerChecksum_ Some steps require that all output files have adler checksums This will go through all output files in a step and make sure they have an adler32 checksum. If they don't it creates an error with code 60451 for the step, failing it. """ error = None files = self.getAllFilesFromStep(step = stepName) for f in files: if not 'adler32' in f.get('checksums', {}).keys(): error = f.get('lfn', None) elif f['checksums']['adler32'] == None: error = f.get('lfn', None) if error: msg = '%s, file was %s' % (WMJobErrorCodes[60451], error) self.addError(stepName, 60451, "NoAdler32Checksum", msg) self.setStepStatus(stepName = stepName, status = 60451) return def checkForRunLumiInformation(self, stepName): """ _checkForRunLumiInformation_ Some steps require that all output files have run lumi information. This will go through all output files in a step and make sure they have run/lumi informaiton. If they don't it creates an error with code 60452 for the step, failing it. """ error = None files = self.getAllFilesFromStep(step = stepName) for f in files: if not f.get('runs', None): error = f.get('lfn', None) else: for run in f['runs']: lumis = run.lumis if not lumis: error = f.get('lfn', None) break if error: msg = '%s, file was %s' % (WMJobErrorCodes[60452], error) self.addError(stepName, 60452, "NoRunLumiInformation", msg) self.setStepStatus(stepName = stepName, status = 60452) return def checkForOutputFiles(self, stepName): """ _checkForOutputFiles_ Verify that there is at least an output file, either from analysis or from an output module. """ files = self.getAllFilesFromStep(step = stepName) analysisFiles = self.getAnalysisFilesFromStep(step = stepName) if not (len(files) > 0 or len(analysisFiles) > 0): msg = WMJobErrorCodes[60450] self.addError(stepName, 60450, "NoOutput", msg) self.setStepStatus(stepName = stepName, status = 60450) return def stripInputFiles(self): """ _stripInputFiles_ If we need to compact the FWJR the easiest way is just to trim the number of input files. """ for stepName in self.data.steps: step = self.retrieveStep(stepName) inputSources = step.input.listSections_() for inputSource in inputSources: source = getattr(step.input, inputSource) for fileNum in range(source.files.fileCount): delattr(source.files, "file%d" % fileNum) source.files.fileCount = 0 return
def __init__(self, name = None): BaseConfigSection.__init__(self, name)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ __MergeSample__ Example of a report from a merge job Created on Fri Jun 8 13:22:30 2012 @author: dballest """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = ( "/Run195376-MuEG-Run2012B-PromptReco-v1-MuEG/DataProcessing/DataProcessingMergeSKIMStreamLogError" ) FrameworkJobReport.workload = "Unknown" FrameworkJobReport.section_("cmsRun1") FrameworkJobReport.cmsRun1.status = 0 FrameworkJobReport.cmsRun1.counter = 1 FrameworkJobReport.cmsRun1.section_("cleanup") FrameworkJobReport.cmsRun1.cleanup.section_("unremoved") FrameworkJobReport.cmsRun1.cleanup.section_("removed") FrameworkJobReport.cmsRun1.cleanup.removed.fileCount = 0 FrameworkJobReport.cmsRun1.section_("errors") FrameworkJobReport.cmsRun1.section_("logs") FrameworkJobReport.cmsRun1.section_("parameters") FrameworkJobReport.cmsRun1.parameters.GeneratorInfo = "" FrameworkJobReport.cmsRun1.parameters.ReadBranches = ""
def testForwardSinkEntireChain(self): """ The test chain looks as follows: worker -> Receiver1(+its Processor configured to do ForwardSink) -> Receiver2 whose address as the destination the ForwardSink is configured with -> Receiver2 will do FileSink so that it's possible to verify the chain. """ # configuration for the Receiver+Processor+ForwardSink 1 (group) config1 = Configuration() config1.component_("AlertProcessor") config1.AlertProcessor.section_("critical") config1.AlertProcessor.section_("soft") config1.AlertProcessor.critical.level = 5 config1.AlertProcessor.soft.level = 0 config1.AlertProcessor.soft.bufferSize = 0 config1.AlertProcessor.critical.section_("sinks") config1.AlertProcessor.soft.section_("sinks") config1.AlertProcessor.critical.sinks.section_("forward") config1.AlertProcessor.soft.sinks.section_("forward") # address of the Receiver2 config1.AlertProcessor.critical.sinks.forward.address = self.address2 config1.AlertProcessor.critical.sinks.forward.controlAddr = self.controlAddr2 config1.AlertProcessor.critical.sinks.forward.label = "ForwardSinkTest" config1.AlertProcessor.soft.sinks.forward.address = self.address2 config1.AlertProcessor.soft.sinks.forward.controlAddr = self.controlAddr2 config1.AlertProcessor.soft.sinks.forward.label = "ForwardSinkTest" # 1) first item of the chain is source of Alerts: worker() # 2) second item is Receiver1 + its Processor + its ForwardSink processor1 = Processor(config1.AlertProcessor) # ForwardSink will be created automatically by the Processor receiver1 = Receiver(self.address1, processor1, self.controlAddr1) receiver1.startReceiver() # non blocking call # 3) third group is Receiver2 with its Processor and final FileSink config2 = Configuration() config2.component_("AlertProcessor") config2.AlertProcessor.section_("critical") config2.AlertProcessor.section_("soft") config2.AlertProcessor.critical.level = 5 config2.AlertProcessor.soft.level = 0 config2.AlertProcessor.soft.bufferSize = 0 config2.AlertProcessor.critical.section_("sinks") config2.AlertProcessor.soft.section_("sinks") config2.AlertProcessor.critical.sinks.section_("file") config2.AlertProcessor.soft.sinks.section_("file") # configuration of the final sink config2.AlertProcessor.critical.sinks.file.outputfile = self.outputfileCritical config2.AlertProcessor.soft.sinks.file.outputfile = self.outputfileSoft processor2 = Processor(config2.AlertProcessor) # final FileSink will be automatically created by the Processor receiver2 = Receiver(self.address2, processor2, self.controlAddr2) receiver2.startReceiver() # non blocking call # now send the Alert messages via worker() and eventually shut the receiver1 worker(self.address1, self.controlAddr1, 10) # wait until receiver1 shuts while receiver1.isReady(): time.sleep(0.4) print "%s waiting for Receiver1 to shut ..." % inspect.stack()[0][3] # shut down receiver2 - need to sendShutdown() to it s = Sender(self.address2, self.controlAddr2, "some_id") s.sendShutdown() # wait until receiver2 shuts while receiver2.isReady(): time.sleep(0.4) print "%s waiting for Receiver2 to shut ..." % inspect.stack()[0][3] # check the result in the files # the bufferSize for soft-level Alerts was set to 0 so all # Alerts should be present also in the soft-level type file # initial 10 Alerts (Level 0 .. 9) gets distributed though a cascade # of two Receivers. soft alerts with level 0 .. 4 are considered # so Receiver1 forwards through its ForwardSink 0 .. 4 Alerts as soft and # 5 .. 9 level Alerts through 'critical'. order is not guaranteed # critical Alerts fileConfig = ConfigSection("file") fileConfig.outputfile = self.outputfileCritical sink = FileSink(fileConfig) expectedLevels = range(5, 10) # that is 5 .. 9 loadAlerts = sink.load() self.assertEqual(len(loadAlerts), len(expectedLevels)) d = dict(very = "interesting") for a in loadAlerts: self.assertEqual(a["Details"], d) # soft Alerts fileConfig = ConfigSection("file") fileConfig.outputfile = self.outputfileSoft sink = FileSink(fileConfig) expectedLevels = range(0, 5) # that is 0 .. 4 loadAlerts = sink.load() self.assertEqual(len(loadAlerts), len(expectedLevels)) for a in loadAlerts: self.assertEqual(a["Details"], d)