def testF(self): """ Test internal functions pythonise_, listSections_ """ config = ConfigSection("config") config.section_("SectionA") config.section_("SectionB") config.SectionA.section_("Section1") config.SectionA.section_("Section2") config.SectionA.Section1.x = 100 config.SectionA.Section1.y = 100 pythonise = config.pythonise_() assert "config.section_('SectionA')" in pythonise, "Pythonise failed: Could not find SectionA" assert "config.SectionA.Section1.x = 100" in pythonise, "Pythonise failed: Could not find x" pythonise = config.SectionA.pythonise_() assert "SectionA.section_('Section1')" in pythonise, "Pythonise failed: Could not find Section1" assert "SectionA.Section1.x = 100" in pythonise, "Pythonise failed: Could not find x" self.assertEqual(config.listSections_(), ['SectionB', 'SectionA']) self.assertEqual(config.SectionA.listSections_(), ['Section2', 'Section1'])
def reportWorkflowToDashboard(self, dashboardActivity): """ _reportWorkflowToDashboard_ Gathers workflow information from the arguments and reports it to the dashboard """ try: #Create a fake config conf = ConfigSection() conf.section_('DashboardReporter') conf.DashboardReporter.dashboardHost = self.dashboardHost conf.DashboardReporter.dashboardPort = self.dashboardPort #Create the reporter reporter = DashboardReporter(conf) #Assemble the info workflow = {} workflow['name'] = self.workloadName workflow['application'] = self.frameworkVersion workflow['TaskType'] = dashboardActivity #Let's try to build information about the inputDataset dataset = 'DoesNotApply' if hasattr(self, 'inputDataset'): dataset = self.inputDataset workflow['datasetFull'] = dataset workflow['user'] = '******' #Send the workflow info reporter.addTask(workflow) except: #This is not critical, if it fails just leave it be logging.error("There was an error with dashboard reporting")
def testF(self): """ Test internal functions pythonise_, listSections_ """ config = ConfigSection("config") config.section_("SectionA") config.section_("SectionB") config.SectionA.section_("Section1") config.SectionA.section_("Section2") config.SectionA.Section1.x = 100 config.SectionA.Section1.y = 100 pythonise = config.pythonise_() assert "config.section_('SectionA')" in pythonise, "Pythonise failed: Could not find SectionA" assert "config.SectionA.Section1.x = 100" in pythonise, "Pythonise failed: Could not find x" pythonise = config.SectionA.pythonise_() assert "SectionA.section_('Section1')" in pythonise, "Pythonise failed: Could not find Section1" assert "SectionA.Section1.x = 100" in pythonise, "Pythonise failed: Could not find x" self.assertEqual(config.listSections_(), ['SectionB', 'SectionA']) self.assertEqual(config.SectionA.listSections_(), ['Section2', 'Section1'])
def reportWorkflowToDashboard(self, dashboardActivity): """ _reportWorkflowToDashboard_ Gathers workflow information from the arguments and reports it to the dashboard """ try: #Create a fake config conf = ConfigSection() conf.section_('DashboardReporter') conf.DashboardReporter.dashboardHost = self.dashboardHost conf.DashboardReporter.dashboardPort = self.dashboardPort #Create the reporter reporter = DashboardReporter(conf) #Assemble the info workflow = {} workflow['name'] = self.workloadName workflow['application'] = self.frameworkVersion workflow['TaskType'] = dashboardActivity #Let's try to build information about the inputDataset dataset = 'DoesNotApply' if hasattr(self, 'inputDataset'): dataset = self.inputDataset workflow['datasetFull'] = dataset workflow['user'] = '******' #Send the workflow info reporter.addTask(workflow) except: #This is not critical, if it fails just leave it be logging.error("There was an error with dashboard reporting")
def testH_ConfigSectionDictionariseInternalChildren(self): """ The test checks if any item of the dictionary_whole_tree_() result is not unexpanded instance of ConfigSection. """ config = ConfigSection("config") config.value1 = "MyValue1" config.section_("Task1") config.Task1.value2 = "MyValue2" config.Task1.section_("subSection") config.Task1.subSection.value3 = "MyValue3" d = config.dictionary_whole_tree_() for values in d.values(): self.assertFalse(isinstance(values, ConfigSection)) self.assertEqual(d["Task1"]["subSection"]["value3"], "MyValue3")
def testH_ConfigSectionDictionariseInternalChildren(self): """ The test checks if any item of the dictionary_whole_tree_() result is not unexpanded instance of ConfigSection. """ config = ConfigSection("config") config.value1 = "MyValue1" config.section_("Task1") config.Task1.value2 = "MyValue2" config.Task1.section_("subSection") config.Task1.subSection.value3 = "MyValue3" d = config.dictionary_whole_tree_() for values in d.values(): self.assertFalse(isinstance(values, ConfigSection)) self.assertEqual(d["Task1"]["subSection"]["value3"], "MyValue3")
def reportWorkflowToDashboard(self, dashboardActivity): """ _reportWorkflowToDashboard_ Gathers workflow information from the arguments and reports it to the dashboard """ try: # Create a fake config conf = ConfigSection() conf.section_("DashboardReporter") conf.DashboardReporter.dashboardHost = self.dashboardHost conf.DashboardReporter.dashboardPort = self.dashboardPort # Create the reporter reporter = DashboardReporter(conf) # Assemble the info workflow = {} workflow["name"] = self.workloadName workflow["application"] = self.frameworkVersion workflow["scheduler"] = "BossAir" workflow["TaskType"] = dashboardActivity # Let's try to build information about the inputDataset dataset = "DoesNotApply" if hasattr(self, "inputDataset"): dataset = self.inputDataset workflow["datasetFull"] = dataset workflow["user"] = "******" # These two make are not reported for now workflow["GridName"] = "NotAvailable" workflow["nevtJob"] = "NotAvailable" # Send the workflow info reporter.addTask(workflow) except: # This is not critical, if it fails just leave it be logging.error("There was an error with dashboard reporting")
class Report: """ The base class for the new jobReport """ def __init__(self, reportname = None): self.data = ConfigSection("FrameworkJobReport") self.data.steps = [] self.data.workload = "Unknown" if reportname: self.addStep(reportname = reportname) return def __str__(self): return str(self.data) def listSteps(self): """ _listSteps_ List the names of all the steps in the report. """ return self.data.steps def setStepStatus(self, stepName, status): """ _setStepStatus_ Set the status for a step. """ reportStep = self.retrieveStep(stepName) reportStep.status = status return def parse(self, xmlfile, stepName = "cmsRun1"): """ _parse_ Read in the FrameworkJobReport XML file produced by cmsRun and pull the information from it into this object """ from WMCore.FwkJobReport.XMLParser import xmlToJobReport try: xmlToJobReport(self, xmlfile) except Exception as ex: msg = "Error reading XML job report file, possibly corrupt XML File:\n" msg += "Details: %s" % str(ex) crashMessage = "\nStacktrace:\n" stackTrace = traceback.format_tb(sys.exc_info()[2], None) for stackFrame in stackTrace: crashMessage += stackFrame self.addError(stepName, 50115, "BadFWJRXML", msg) raise FwkJobReportException(msg) def jsonizeFiles(self, reportModule): """ _jsonizeFiles_ Put individual files in JSON format. """ jsonFiles = [] files = getattr(reportModule, "files", None) if not files: return jsonFiles fileCount = getattr(reportModule.files, "fileCount", 0) for i in range(fileCount): reportFile = getattr(reportModule.files, "file%s" % i) jsonFile = reportFile.dictionary_() if jsonFile.get('runs', None): cfgSectionRuns = jsonFile["runs"] jsonFile["runs"] = {} for runNumber in cfgSectionRuns.listSections_(): jsonFile["runs"][str(runNumber)] = getattr(cfgSectionRuns, runNumber) jsonFiles.append(jsonFile) return jsonFiles def jsonizePerformance(self, perfSection): """ _jsonizePerformance_ Convert the performance section of the FWJR into JSON. """ jsonPerformance = {} for reportSection in ["storage", "memory", "cpu", "multicore"]: jsonPerformance[reportSection] = {} if not hasattr(perfSection, reportSection): continue jsonPerformance[reportSection] = getattr(perfSection, reportSection).dictionary_() for key in jsonPerformance[reportSection].keys(): val = jsonPerformance[reportSection][key] if type(val) == types.FloatType: if math.isinf(val) or math.isnan(val): jsonPerformance[reportSection][key] = None return jsonPerformance def __to_json__(self, thunker): """ __to_json__ Create a JSON version of the Report. """ jsonReport = {} jsonReport["task"] = self.getTaskName() jsonReport["steps"] = {} jsonReport["skippedFiles"] = self.getAllSkippedFiles() jsonReport["fallbackFiles"] = self.getAllFallbackFiles() for stepName in self.listSteps(): reportStep = self.retrieveStep(stepName) jsonStep = {} jsonStep["status"] = reportStep.status stepTimes = self.getTimes(stepName) if stepTimes["startTime"] != None: stepTimes["startTime"] = int(stepTimes["startTime"]) if stepTimes["stopTime"] != None: stepTimes["stopTime"] = int(stepTimes["stopTime"]) jsonStep["start"] = stepTimes["startTime"] jsonStep["stop"] = stepTimes["stopTime"] jsonStep["performance"] = self.jsonizePerformance(reportStep.performance) jsonStep["output"] = {} for outputModule in reportStep.outputModules: reportOutputModule = getattr(reportStep.output, outputModule) jsonStep["output"][outputModule] = self.jsonizeFiles(reportOutputModule) analysisSection = getattr(reportStep, 'analysis', None) if analysisSection: jsonStep["output"]['analysis'] = self.jsonizeFiles(analysisSection) jsonStep["input"] = {} for inputSource in reportStep.input.listSections_(): reportInputSource = getattr(reportStep.input, inputSource) jsonStep["input"][inputSource] = self.jsonizeFiles(reportInputSource) jsonStep["errors"] = [] errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) jsonStep["errors"].append({"type": reportError.type, "details": reportError.details, "exitCode": reportError.exitCode}) jsonStep["cleanup"] = {} jsonStep["parameters"] = {} jsonStep["site"] = self.getSiteName() jsonStep["analysis"] = {} jsonStep["logs"] = {} jsonReport["steps"][stepName] = jsonStep return jsonReport def getSiteName(self): """ _getSiteName_ Returns the site name attribute (no step specific) """ return getattr(self.data, 'siteName', {}) def getExitCodes(self): """ _getExitCodes_ Return a list of all non-zero exit codes in the report """ returnCodes = set() for stepName in self.listSteps(): returnCodes.update(self.getStepExitCodes(stepName = stepName)) return returnCodes def getStepExitCodes(self, stepName): """ _getStepExitCodes_ Returns a list of all non-zero exit codes in the step """ returnCodes = set() reportStep = self.retrieveStep(stepName) errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) if getattr(reportError, 'exitCode', None): returnCodes.add(int(reportError.exitCode)) return returnCodes def getExitCode(self): """ _getExitCode_ Return the first exit code you find. """ returnCode = 0 for stepName in self.listSteps(): errorCode = self.getStepExitCode(stepName = stepName) if errorCode == 99999: # Then we don't know what this error was # Mark it for return only if we don't fine an # actual error code in the job. returnCode = errorCode elif errorCode != 0: return errorCode return returnCode def getStepExitCode(self, stepName): """ _getStepExitCode_ Get the exit code for a particular step Return 0 if none """ returnCode = 0 reportStep = self.retrieveStep(stepName) errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) if not getattr(reportError, 'exitCode', None): returnCode = 99999 else: return int(reportError.exitCode) return returnCode def persist(self, filename): """ _persist_ Pickle this object and save it to disk. """ handle = open(filename, 'w') cPickle.dump(self.data, handle) handle.close() return def unpersist(self, filename, reportname = None): """ _unpersist_ Load a pickled FWJR from disk. """ handle = open(filename, 'r') self.data = cPickle.load(handle) handle.close() # old self.report (if it existed) became unattached if reportname: self.report = getattr(self.data, reportname) return def addOutputModule(self, moduleName): """ _addOutputModule_ Add an entry for an output module. """ self.report.outputModules.append(moduleName) self.report.output.section_(moduleName) outMod = getattr(self.report.output, moduleName) outMod.section_("files") outMod.section_("dataset") outMod.files.fileCount = 0 return outMod def killOutput(self): """ _killOutput_ Remove all the output from the report. This is useful for chained processing where we don't want to keep the output from a particular step in a job. """ for outputModuleName in self.report.outputModules: delattr(self.report.output, outputModuleName) self.report.outputModules = [] return def addOutputFile(self, outputModule, file = {}): """ _addFile_ Add an output file to the outputModule provided. """ if not checkFileForCompletion(file): # Then the file is not complete, and should not be added print "ERROR" return None # Now load the output module and create the file object outMod = getattr(self.report.output, outputModule, None) if outMod == None: outMod = self.addOutputModule(outputModule) count = outMod.files.fileCount fileSection = "file%s" % count outMod.files.section_(fileSection) fileRef = getattr(outMod.files, fileSection) outMod.files.fileCount += 1 # Now we need to eliminate the optional and non-primitives: # runs, parents, branches, locations and datasets keyList = file.keys() fileRef.section_("runs") if "runs" in file: for run in file["runs"]: addRunInfoToFile(fileRef, run) keyList.remove('runs') if "parents" in file: setattr(fileRef, 'parents', list(file['parents'])) keyList.remove('parents') if "locations" in file: fileRef.location = list(file["locations"]) keyList.remove('locations') elif "SEName" in file: fileRef.location = [file["SEName"]] if "LFN" in file: fileRef.lfn = file["LFN"] keyList.remove("LFN") if "PFN" in file: fileRef.lfn = file["PFN"] keyList.remove("PFN") # All right, the rest should be JSONalizable python primitives for entry in keyList: setattr(fileRef, entry, file[entry]) #And we're done return fileRef def addInputSource(self, sourceName): """ _addInputSource_ Add an input source to the report doing nothing if the input source already exists. """ if hasattr(self.report.input, sourceName): return getattr(self.report.input, sourceName) self.report.input.section_(sourceName) srcMod = getattr(self.report.input, sourceName) srcMod.section_("files") srcMod.files.fileCount = 0 return srcMod def addInputFile(self, sourceName, **attrs): """ _addInputFile_ Add an input file to the given source. """ srcMod = getattr(self.report.input, sourceName, None) if srcMod == None: srcMod = self.addInputSource(sourceName) count = srcMod.files.fileCount fileSection = "file%s" % count srcMod.files.section_(fileSection) fileRef = getattr(srcMod.files, fileSection) srcMod.files.fileCount += 1 keyList = attrs.keys() fileRef.section_("runs") if "runs" in attrs: for run in attrs["runs"]: addRunInfoToFile(fileRef, run) keyList.remove('runs') if "parents" in attrs: keyList.remove('parents') if "locations" in attrs: keyList.remove('locations') # All right, the rest should be JSONalizable python primitives for entry in keyList: setattr(fileRef, entry, attrs[entry]) return fileRef def addAnalysisFile(self, filename, **attrs): """ _addAnalysisFile_ Add an Analysis File. """ analysisFiles = self.report.analysis.files count = analysisFiles.fileCount label = "file%s" % count analysisFiles.section_(label) newFile = getattr(analysisFiles, label) newFile.fileName = filename [ setattr(newFile, x, y) for x, y in attrs.items() ] analysisFiles.fileCount += 1 return def addRemovedCleanupFile(self, **attrs): """ _addRemovedCleanupFile_ Add a file to the cleanup.removed file """ removedFiles = self.report.cleanup.removed count = self.report.cleanup.removed.fileCount label = 'file%s' % count removedFiles.section_(label) newFile = getattr(removedFiles, label) [ setattr(newFile, x, y) for x, y in attrs.items() ] self.report.cleanup.removed.fileCount += 1 return def addError(self, stepName, exitCode, errorType, errorDetails): """ _addError_ Add an error report with an exitCode, type/class of error and details of the error as a string """ if self.retrieveStep(stepName) == None: # Create a step and set it to failed # Assumption: Adding an error fails a step self.addStep(stepName, status = 1) stepSection = self.retrieveStep(stepName) errorCount = getattr(stepSection.errors, "errorCount", 0) errEntry = "error%s" % errorCount stepSection.errors.section_(errEntry) errDetails = getattr(stepSection.errors, errEntry) errDetails.exitCode = exitCode errDetails.type = str(errorType) errDetails.details = errorDetails setattr(stepSection.errors, "errorCount", errorCount +1) return def addSkippedFile(self, lfn, pfn): """ _addSkippedFile_ Report a skipped input file """ count = self.report.skipped.files.fileCount entry = "file%s" % count self.report.skipped.files.section_(entry) skipSect = getattr(self.report.skipped.files, entry) skipSect.PhysicalFileName = pfn skipSect.LogicalFileName = lfn self.report.skipped.files.fileCount += 1 return def addFallbackFile(self, lfn, pfn): """ _addFallbackFile_ Report a fallback attempt for input file """ count = self.report.fallback.files.fileCount entry = "file%s" % count self.report.fallback.files.section_(entry) fallbackSect = getattr(self.report.fallback.files, entry) fallbackSect.PhysicalFileName = pfn fallbackSect.LogicalFileName = lfn self.report.fallback.files.fileCount += 1 return def addSkippedEvent(self, run, event): """ _addSkippedEvent_ Add a skipped event. """ self.report.skipped.events.section_(str(run)) runsect = getattr(self.report.skipped.events, str(run)) if not hasattr(runsect, "eventList"): runsect.eventList = [] runsect.eventList.append(event) return def addStep(self, reportname, status = 1): """ _addStep_ This creates a report section into self.report """ if hasattr(self.data, reportname): msg = "Attempted to create pre-existing report section %s" % (reportname) logging.error(msg) return self.data.steps.append(reportname) self.reportname = reportname self.data.section_(reportname) self.report = getattr(self.data, reportname) self.report.id = None self.report.status = status self.report.outputModules = [] # structure self.report.section_("site") self.report.section_("output") self.report.section_("input") self.report.section_("performance") self.report.section_("analysis") self.report.section_("errors") self.report.section_("skipped") self.report.section_("fallback") self.report.section_("parameters") self.report.section_("logs") self.report.section_("cleanup") self.report.analysis.section_("files") self.report.cleanup.section_("removed") self.report.cleanup.section_("unremoved") self.report.skipped.section_("events") self.report.skipped.section_("files") self.report.fallback.section_("files") self.report.skipped.files.fileCount = 0 self.report.fallback.files.fileCount = 0 self.report.analysis.files.fileCount = 0 self.report.cleanup.removed.fileCount = 0 return def setStep(self, stepName, stepSection): """ _setStep_ """ if not stepName in self.data.steps: self.data.steps.append(stepName) else: logging.info("Step %s is now being overridden by a new step report" % stepName) self.data.section_(stepName) setattr(self.data, stepName, stepSection) return def retrieveStep(self, step): """ _retrieveStep_ Grabs a report in the raw and returns it. """ reportSection = getattr(self.data, step, None) return reportSection def load(self, filename): """ _load_ This just maps to unpersist """ self.unpersist(filename) return def save(self, filename): """ _save_ This just maps to persist """ self.persist(filename) return def getOutputModule(self, step, outputModule): """ _getOutputModule_ Get the output module from a particular step """ stepReport = self.retrieveStep(step = step) if not stepReport: return None return getattr(stepReport.output, outputModule, None) def getOutputFile(self, fileName, outputModule, step): """ _getOutputFile_ Takes a fileRef object and returns a DataStructs/File object as output """ outputMod = self.getOutputModule(step = step, outputModule = outputModule) if not outputMod: return None fileRef = getattr(outputMod.files, fileName, None) newFile = File(locations = set()) #Locations newFile.setLocation(getattr(fileRef, "location", None)) #Runs runList = fileRef.runs.listSections_() for run in runList: lumis = getattr(fileRef.runs, run) newRun = Run(int(run), *lumis) newFile.addRun(newRun) newFile["lfn"] = getattr(fileRef, "lfn", None) newFile["pfn"] = getattr(fileRef, "pfn", None) newFile["events"] = int(getattr(fileRef, "events", 0)) newFile["size"] = int(getattr(fileRef, "size", 0)) newFile["branches"] = getattr(fileRef, "branches", []) newFile["input"] = getattr(fileRef, "input", []) newFile["inputpfns"] = getattr(fileRef, "inputpfns", []) newFile["branch_hash"] = getattr(fileRef, "branch_hash", None) newFile["catalog"] = getattr(fileRef, "catalog", "") newFile["guid"] = getattr(fileRef, "guid", "") newFile["module_label"] = getattr(fileRef, "module_label", "") newFile["checksums"] = getattr(fileRef, "checksums", {}) newFile["merged"] = bool(getattr(fileRef, "merged", False)) newFile["dataset"] = getattr(fileRef, "dataset", {}) newFile["acquisitionEra"] = getattr(fileRef, 'acquisitionEra', None) newFile["processingVer"] = getattr(fileRef, 'processingVer', None) newFile["validStatus"] = getattr(fileRef, 'validStatus', None) newFile["globalTag"] = getattr(fileRef, 'globalTag', None) newFile["prep_id"] = getattr(fileRef, 'prep_id', None) newFile['configURL'] = getattr(fileRef, 'configURL', None) newFile['inputPath'] = getattr(fileRef, 'inputPath', None) newFile["outputModule"] = outputModule newFile["fileRef"] = fileRef return newFile def getAllFilesFromStep(self, step): """ _getAllFilesFromStep_ For a given step, retrieve all the associated files """ stepReport = self.retrieveStep(step = step) if not stepReport: logging.debug("Asked to retrieve files from non-existant step %s" % step) return [] listOfModules = getattr(stepReport, 'outputModules', None) if not listOfModules: logging.debug("Asked to retrieve files from step %s with no outputModules" % step) logging.debug("StepReport: %s" % stepReport) return [] listOfFiles = [] for module in listOfModules: tmpList = self.getFilesFromOutputModule(step = step, outputModule = module) if not tmpList: continue listOfFiles.extend(tmpList) return listOfFiles def getAllFiles(self): """ _getAllFiles_ Grabs all files in all output modules in all steps """ listOfFiles = [] for step in self.data.steps: tmp = self.getAllFilesFromStep(step = step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getAllInputFiles(self): """ _getAllInputFiles_ Gets all the input files """ listOfFiles = [] for step in self.data.steps: tmp = self.getInputFilesFromStep(stepName = step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getInputFilesFromStep(self, stepName, inputSource = None): """ _getInputFilesFromStep_ Retrieve a list of input files from the given step. """ step = self.retrieveStep(stepName) inputSources = [] if inputSource == None: inputSources = step.input.listSections_() else: inputSources = [inputSource] inputFiles = [] for inputSource in inputSources: source = getattr(step.input, inputSource) for fileNum in range(source.files.fileCount): fwjrFile = getattr(source.files, "file%d" % fileNum) lfn = getattr(fwjrFile, "lfn", None) pfn = getattr(fwjrFile, "pfn", None) size = getattr(fwjrFile, "size", 0) events = getattr(fwjrFile, "events", 0) branches = getattr(fwjrFile, "branches", []) catalog = getattr(fwjrFile, "catalog", None) guid = getattr(fwjrFile, "guid", None) inputSourceClass = getattr(fwjrFile, "input_source_class", None) moduleLabel = getattr(fwjrFile, "module_label", None) inputType = getattr(fwjrFile, "input_type", None) inputFile = File(lfn = lfn, size = size, events = events) inputFile["pfn"] = pfn inputFile["branches"] = branches inputFile["catalog"] = catalog inputFile["guid"] = guid inputFile["input_source_class"] = inputSourceClass inputFile["module_label"] = moduleLabel inputFile["input_type"] = inputType runSection = getattr(fwjrFile, "runs") runNumbers = runSection.listSections_() for runNumber in runNumbers: lumiTuple = getattr(runSection, str(runNumber)) inputFile.addRun(Run(int(runNumber), *lumiTuple)) inputFiles.append(inputFile) return inputFiles def getFilesFromOutputModule(self, step, outputModule): """ _getFilesFromOutputModule_ Grab all the files in a particular output module """ listOfFiles = [] outputMod = self.getOutputModule(step = step, outputModule = outputModule) if not outputMod: return None for n in range(outputMod.files.fileCount): file = self.getOutputFile(fileName = 'file%i' %(n), outputModule = outputModule, step = step) if not file: msg = "Could not find file%i in module" % (n) logging.error(msg) return None #Now, append to the list of files listOfFiles.append(file) return listOfFiles def getAllSkippedFiles(self): """ _getAllSkippedFiles_ Get a list of LFNs for all the input files listed as skipped on the report. """ listOfFiles = [] for step in self.data.steps: tmp = self.getSkippedFilesFromStep(stepName = step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getAllFallbackFiles(self): """ _getAllFallbackFiles_ Get a list of LFNs for all the input files listed as fallback attempt on the report """ listOfFiles = [] for step in self.data.steps: tmp = self.getFallbackFilesFromStep(stepName = step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getSkippedFilesFromStep(self, stepName): """ _getSkippedFilesFromStep_ Get a list of LFNs skipped in the given step """ skippedFiles = [] step = self.retrieveStep(stepName) filesSection = step.skipped.files fileCount = getattr(filesSection, "fileCount", 0) for fileNum in range(fileCount): fileSection = getattr(filesSection, "file%d" % fileNum) lfn = getattr(fileSection, "LogicalFileName", None) if lfn is not None: skippedFiles.append(lfn) else: logging.error("Found no LFN in file %s" % str(fileSection)) return skippedFiles def getFallbackFilesFromStep(self, stepName): """ _getFallbackFilesFromStep_ Get a list of LFNs which triggered a fallback in the given step """ fallbackFiles = [] step = self.retrieveStep(stepName) try: filesSection = step.fallback.files except AttributeError: return fallbackFiles fileCount = getattr(filesSection, "fileCount", 0) for fileNum in range(fileCount): fileSection = getattr(filesSection, "file%d" % fileNum) lfn = getattr(fileSection, "LogicalFileName", None) if lfn is not None: fallbackFiles.append(lfn) else: logging.error("Found no LFN in file %s" % str(fileSection)) return fallbackFiles def getStepErrors(self, stepName): """ _getStepErrors_ Get all errors for a given step """ if self.retrieveStep(stepName) == None: # Create a step and set it to failed # Assumption: Adding an error fails a step self.addStep(stepName, status = 1) stepSection = self.retrieveStep(stepName) errorCount = getattr(stepSection.errors, "errorCount", 0) if errorCount == 0: return {} else: return stepSection.errors.dictionary_() def stepSuccessful(self, stepName): """ _stepSuccessful_ Determine wether or not a step was successful. """ stepReport = self.retrieveStep(step = stepName) status = getattr(stepReport, 'status', 1) # We have too many possibilities if status not in [0, '0', 'success', 'Success']: return False return True def taskSuccessful(self, ignoreString = 'logArch'): """ _taskSuccessful_ Return True if all steps successful, False otherwise """ value = True if len(self.data.steps) == 0: # Mark jobs as failed if they have no steps msg = "Could not find any steps" logging.error(msg) return False for stepName in self.data.steps: # Ignore specified steps # i.e., logArch steps can fail without causing # the task to fail if ignoreString and re.search(ignoreString, stepName): continue if not self.stepSuccessful(stepName = stepName): value = False return value def getAnalysisFilesFromStep(self, step): """ _getAnalysisFilesFromStep_ Retrieve a list of all the analysis files produced in a step. """ stepReport = self.retrieveStep(step=step) if not stepReport or not hasattr(stepReport.analysis, 'files'): return [] analysisFiles = stepReport.analysis.files results = [] for fileNum in range(analysisFiles.fileCount): results.append(getattr(analysisFiles, "file%s" % fileNum)) # filter out duplicates duplicateCheck = [] filteredResults = [] for result in results: inputtag = getattr(result, 'inputtag', None) if (result.fileName, inputtag) not in duplicateCheck: duplicateCheck.append((result.fileName, inputtag)) filteredResults.append(result) return filteredResults def getAllFileRefsFromStep(self, step): """ _getAllFileRefsFromStep_ Retrieve a list of all files produced in a step. The files will be in the form of references to the ConfigSection objects in the acutal report. """ stepReport = self.retrieveStep(step = step) if not stepReport: return [] outputModules = getattr(stepReport, "outputModules", []) fileRefs = [] for outputModule in outputModules: outputModuleRef = self.getOutputModule(step = step, outputModule = outputModule) for i in range(outputModuleRef.files.fileCount): fileRefs.append(getattr(outputModuleRef.files, "file%i" % i)) analysisFiles = self.getAnalysisFilesFromStep(step) fileRefs.extend(analysisFiles) return fileRefs def addInfoToOutputFilesForStep(self, stepName, step): """ _addInfoToOutputFilesForStep_ Add the information missing from output files to the files This requires the WMStep to be passed in """ stepReport = self.retrieveStep(step = stepName) fileInfo = FileInfo() if not stepReport: return None listOfModules = getattr(stepReport, 'outputModules', None) for module in listOfModules: outputMod = getattr(stepReport.output, module, None) for n in range(outputMod.files.fileCount): file = getattr(outputMod.files, 'file%i' %(n), None) if not file: msg = "Could not find file%i in module" % (n) logging.error(msg) return None fileInfo(fileReport = file, step = step, outputModule = module) return def deleteOutputModuleForStep(self, stepName, moduleName): """ _deleteOutputModuleForStep_ Delete any reference to the given output module in the step report that includes deleting any output file it produced """ stepReport = self.retrieveStep(step = stepName) if not stepReport: return listOfModules = getattr(stepReport, 'outputModules', []) if moduleName not in listOfModules: return delattr(stepReport.output, moduleName) listOfModules.remove(moduleName) return def setStepStartTime(self, stepName): """ _setStepStatus_ Set the startTime for a step. """ reportStep = self.retrieveStep(stepName) reportStep.startTime = time.time() return def setStepStopTime(self, stepName): """ _setStepStatus_ Set the stopTime for a step. """ reportStep = self.retrieveStep(stepName) reportStep.stopTime = time.time() return def getTimes(self, stepName): """ _getTimes_ Return a dictionary with the start and stop times """ reportStep = self.retrieveStep(stepName) startTime = getattr(reportStep, 'startTime', None) stopTime = getattr(reportStep, 'stopTime', None) return {'startTime': startTime, 'stopTime': stopTime} def getFirstStartLastStop(self): """ _getFirstStartLastStop_ Get the first startTime, last stopTime """ steps = self.listSteps() if len(steps) < 1: return None firstStep = self.getTimes(stepName = steps[0]) startTime = firstStep['startTime'] stopTime = firstStep['stopTime'] for stepName in steps: timeStamps = self.getTimes(stepName = stepName) if timeStamps['startTime'] is None or timeStamps['stopTime'] is None: # Unusable times continue if startTime is None or startTime > timeStamps['startTime']: startTime = timeStamps['startTime'] if stopTime is None or stopTime < timeStamps['stopTime']: stopTime = timeStamps['stopTime'] return {'startTime': startTime, 'stopTime': stopTime} def setTaskName(self, taskName): """ _setTaskName_ Set the task name for the report """ self.data.task = taskName return def getTaskName(self): """ _getTaskName_ Return the task name """ return getattr(self.data, 'task', None) def setJobID(self, jobID): """ _setJobID_ Set the WMBS jobID """ self.data.jobID = jobID return def getJobID(self): """ _getJobID_ Get the WMBS job ID if attached """ return getattr(self.data, 'jobID', None) def getAllFileRefs(self): """ _getAllFileRefs_ Get references for all file in the step """ fileRefs = [] for step in self.data.steps: tmpRefs = self.getAllFileRefsFromStep(step = step) if len(tmpRefs) > 0: fileRefs.extend(tmpRefs) return fileRefs def setAcquisitionProcessing(self, acquisitionEra, processingVer, processingStr = None): """ _setAcquisitionProcessing_ Set the acquisition and processing era for every output file ONLY run this after all files have been accumulated; it doesn't set things for future files. """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.acquisitionEra = acquisitionEra f.processingVer = processingVer f.processingStr = processingStr return def setValidStatus(self, validStatus): """ _setValidStatus_ Set the validStatus for all steps and all files. ONLY run this after all files have been attached. """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.validStatus = validStatus return def setGlobalTag(self, globalTag): """ _setGlobalTag_ Set the global Tag from the spec on the WN ONLY run this after all the files have been attached """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.globalTag = globalTag return def setPrepID(self, prep_id): """ _setGlobalTag_ Set the global Tag from the spec on the WN ONLY run this after all the files have been attached """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.prep_id = prep_id return def setConfigURL(self, configURL): """ _setConfigURL_ Set the config URL in a portable storage form """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.configURL = configURL return def setInputDataset(self, inputPath): """ _setInputDataset_ Set the input dataset path for the task in each file """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.inputPath = inputPath return def setStepRSS(self, stepName, min, max, average): """ _setStepRSS_ Set the Performance RSS information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('RSSMemory') reportStep.performance.RSSMemory.min = min reportStep.performance.RSSMemory.max = max reportStep.performance.RSSMemory.average = average return def setStepPMEM(self, stepName, min, max, average): """ _setStepPMEM_ Set the Performance PMEM information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('PhysicalMemory') reportStep.performance.PhysicalMemory.min = min reportStep.performance.PhysicalMemory.max = max reportStep.performance.PhysicalMemory.average = average return def setStepPCPU(self, stepName, min, max, average): """ _setStepPCPU_ Set the Performance PCPU information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('PercentCPU') reportStep.performance.PercentCPU.min = min reportStep.performance.PercentCPU.max = max reportStep.performance.PercentCPU.average = average return def setStepVSize(self, stepName, min, max, average): """ _setStepVSize_ Set the Performance PCPU information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('VSizeMemory') reportStep.performance.VSizeMemory.min = min reportStep.performance.VSizeMemory.max = max reportStep.performance.VSizeMemory.average = average return def setStepCounter(self, stepName, counter): """ _setStepCounter_ Assign a number to the step """ reportStep = self.retrieveStep(stepName) reportStep.counter = counter return def checkForAdlerChecksum(self, stepName): """ _checkForAdlerChecksum_ Some steps require that all output files have adler checksums This will go through all output files in a step and make sure they have an adler32 checksum. If they don't it creates an error with code 60451 for the step, failing it. """ error = None files = self.getAllFilesFromStep(step = stepName) for f in files: if not 'adler32' in f.get('checksums', {}).keys(): error = f.get('lfn', None) elif f['checksums']['adler32'] == None: error = f.get('lfn', None) if error: msg = '%s, file was %s' % (WMJobErrorCodes[60451], error) self.addError(stepName, 60451, "NoAdler32Checksum", msg) self.setStepStatus(stepName = stepName, status = 60451) return def checkForRunLumiInformation(self, stepName): """ _checkForRunLumiInformation_ Some steps require that all output files have run lumi information. This will go through all output files in a step and make sure they have run/lumi informaiton. If they don't it creates an error with code 60452 for the step, failing it. """ error = None files = self.getAllFilesFromStep(step = stepName) for f in files: if not f.get('runs', None): error = f.get('lfn', None) else: for run in f['runs']: lumis = run.lumis if not lumis: error = f.get('lfn', None) break if error: msg = '%s, file was %s' % (WMJobErrorCodes[60452], error) self.addError(stepName, 60452, "NoRunLumiInformation", msg) self.setStepStatus(stepName = stepName, status = 60452) return def checkForOutputFiles(self, stepName): """ _checkForOutputFiles_ Verify that there is at least an output file, either from analysis or from an output module. """ files = self.getAllFilesFromStep(step = stepName) analysisFiles = self.getAnalysisFilesFromStep(step = stepName) if not (len(files) > 0 or len(analysisFiles) > 0): msg = WMJobErrorCodes[60450] self.addError(stepName, 60450, "NoOutput", msg) self.setStepStatus(stepName = stepName, status = 60450) return def stripInputFiles(self): """ _stripInputFiles_ If we need to compact the FWJR the easiest way is just to trim the number of input files. """ for stepName in self.data.steps: step = self.retrieveStep(stepName) inputSources = step.input.listSections_() for inputSource in inputSources: source = getattr(step.input, inputSource) for fileNum in range(source.files.fileCount): delattr(source.files, "file%d" % fileNum) source.files.fileCount = 0 return
class Report(object): """ The base class for the new jobReport """ def __init__(self, reportname=None): self.data = ConfigSection("FrameworkJobReport") self.data.steps = [] self.data.workload = "Unknown" self.report = None self.reportname = "" if reportname: self.addStep(reportname=reportname) return def __str__(self): return str(self.data) def listSteps(self): """ _listSteps_ List the names of all the steps in the report. """ return self.data.steps def setStepStatus(self, stepName, status): """ _setStepStatus_ Set the status for a step. """ reportStep = self.retrieveStep(stepName) reportStep.status = status return def parse(self, xmlfile, stepName="cmsRun1"): """ _parse_ Read in the FrameworkJobReport XML file produced by cmsRun and pull the information from it into this object """ from WMCore.FwkJobReport.XMLParser import xmlToJobReport try: xmlToJobReport(self, xmlfile) except Exception as ex: msg = "Error reading XML job report file, possibly corrupt XML File:\n" msg += "Details: %s" % str(ex) crashMessage = "\nStacktrace:\n" stackTrace = traceback.format_tb(sys.exc_info()[2], None) for stackFrame in stackTrace: crashMessage += stackFrame logging.debug(crashMessage) raise FwkJobReportException(msg) @staticmethod def jsonizeFiles(reportModule): """ _jsonizeFiles_ Put individual files in JSON format. """ jsonFiles = [] files = getattr(reportModule, "files", None) if not files: return jsonFiles fileCount = getattr(reportModule.files, "fileCount", 0) for i in range(fileCount): reportFile = getattr(reportModule.files, "file%s" % i) jsonFile = reportFile.dictionary_() if jsonFile.get('runs', None): cfgSectionRuns = jsonFile["runs"] jsonFile["runs"] = {} for runNumber in cfgSectionRuns.listSections_(): jsonFile["runs"][str(runNumber)] = getattr( cfgSectionRuns, runNumber) jsonFiles.append(jsonFile) return jsonFiles @staticmethod def jsonizePerformance(perfSection): """ _jsonizePerformance_ Convert the performance section of the FWJR into JSON. """ jsonPerformance = {} for reportSection in ["storage", "memory", "cpu", "multicore"]: jsonPerformance[reportSection] = {} if not hasattr(perfSection, reportSection): continue jsonPerformance[reportSection] = getattr( perfSection, reportSection).dictionary_() for key in jsonPerformance[reportSection]: val = jsonPerformance[reportSection][key] if isinstance(val, float): if math.isinf(val) or math.isnan(val): jsonPerformance[reportSection][key] = None return jsonPerformance def __to_json__(self, thunker): """ __to_json__ Create a JSON version of the Report. """ jsonReport = {} jsonReport["WorkerNodeInfo"] = self.getWorkerNodeInfo() jsonReport["task"] = self.getTaskName() jsonReport["steps"] = {} jsonReport["skippedFiles"] = self.getAllSkippedFiles() jsonReport["fallbackFiles"] = self.getAllFallbackFiles() jsonReport["Campaign"] = self.getCampaign() jsonReport["PrepID"] = self.getPrepID() jsonReport["EOSLogURL"] = self.getLogURL() for stepName in self.listSteps(): reportStep = self.retrieveStep(stepName) jsonStep = {} jsonStep["status"] = reportStep.status stepTimes = self.getTimes(stepName) if stepTimes["startTime"] is not None: stepTimes["startTime"] = int(stepTimes["startTime"]) if stepTimes["stopTime"] is not None: stepTimes["stopTime"] = int(stepTimes["stopTime"]) jsonStep["start"] = stepTimes["startTime"] jsonStep["stop"] = stepTimes["stopTime"] jsonStep["performance"] = self.jsonizePerformance( reportStep.performance) jsonStep["output"] = {} for outputModule in reportStep.outputModules: reportOutputModule = getattr(reportStep.output, outputModule) jsonStep["output"][outputModule] = self.jsonizeFiles( reportOutputModule) analysisSection = getattr(reportStep, 'analysis', None) if analysisSection: jsonStep["output"]['analysis'] = self.jsonizeFiles( analysisSection) jsonStep["input"] = {} for inputSource in reportStep.input.listSections_(): reportInputSource = getattr(reportStep.input, inputSource) jsonStep["input"][inputSource] = self.jsonizeFiles( reportInputSource) jsonStep["errors"] = [] errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) jsonStep["errors"].append({ "type": reportError.type, "details": reportError.details, "exitCode": reportError.exitCode }) jsonStep["cleanup"] = {} jsonStep["parameters"] = {} jsonStep["site"] = self.getSiteName() jsonStep["analysis"] = {} jsonStep["logs"] = {} jsonReport["steps"][stepName] = jsonStep return jsonReport def getSiteName(self): """ _getSiteName_ Returns the site name attribute (no step specific) """ return getattr(self.data, 'siteName', {}) def _setSiteName(self, site): """ _setSiteName_ Set the site name attribute (no step specific) """ setattr(self.data, 'siteName', site) return def getExitCodes(self): """ _getExitCodes_ Return a list of all non-zero exit codes in the report """ returnCodes = set() for stepName in self.listSteps(): returnCodes.update(self.getStepExitCodes(stepName=stepName)) return returnCodes def getStepExitCodes(self, stepName): """ _getStepExitCodes_ Returns a list of all non-zero exit codes in the step """ returnCodes = set() reportStep = self.retrieveStep(stepName) errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) if getattr(reportError, 'exitCode', None): returnCodes.add(int(reportError.exitCode)) else: # exitCode is likely set to None(?!?) returnCodes.add(99999) return returnCodes def getExitCode(self): """ _getExitCode_ Return the first exit code you find. """ returnCode = 0 for stepName in self.listSteps(): errorCode = self.getStepExitCode(stepName=stepName) if errorCode == 99999: # Then we don't know what this error was # Mark it for return only if we don't fine an # actual error code in the job. returnCode = errorCode elif errorCode != 0: return errorCode return returnCode def getStepExitCode(self, stepName): """ _getStepExitCode_ Get the exit code for a particular step Return 0 if none """ returnCode, _ = self.getStepExitCodeAndMessage(stepName) return returnCode def getStepExitCodeAndMessage(self, stepName): """ _getStepExitCodeAndMessage_ Get the exit code and message for a particular step Return (0, None) if there were no errors. """ returnCode = 0 returnMessage = None reportStep = self.retrieveStep(stepName) errorCount = getattr(reportStep.errors, "errorCount", 0) for i in range(errorCount): reportError = getattr(reportStep.errors, "error%i" % i) if not getattr(reportError, 'exitCode', None): returnCode = 99999 returnMessage = 'Unknown' else: return int(reportError.exitCode), reportError.details return returnCode, returnMessage def persist(self, filename): """ _persist_ Pickle this object and save it to disk. """ if PY3: with open(filename, 'wb') as handle: pickle.dump(encodeUnicodeToBytes(self.data), handle) else: with open(filename, 'w') as handle: pickle.dump(self.data, handle) return def unpersist(self, filename, reportname=None): """ _unpersist_ Load a pickled FWJR from disk. """ if PY3: with open(filename, 'rb') as handle: self.data = decodeBytesToUnicode(pickle.load(handle)) else: with open(filename, 'r') as handle: self.data = pickle.load(handle) # old self.report (if it existed) became unattached if reportname: self.report = getattr(self.data, reportname) return def addOutputModule(self, moduleName): """ _addOutputModule_ Add an entry for an output module. """ self.report.outputModules.append(moduleName) self.report.output.section_(moduleName) outMod = getattr(self.report.output, moduleName) outMod.section_("files") outMod.section_("dataset") outMod.files.fileCount = 0 return outMod def killOutput(self): """ _killOutput_ Remove all the output from the report. This is useful for chained processing where we don't want to keep the output from a particular step in a job. """ for outputModuleName in self.report.outputModules: delattr(self.report.output, outputModuleName) self.report.outputModules = [] return def addOutputFile(self, outputModule, aFile=None): """ _addFile_ Add an output file to the outputModule provided. """ logging.info( "addOutputFile method called with outputModule: %s, aFile: %s", outputModule, aFile) aFile = aFile or {} # Now load the output module and create the file object outMod = getattr(self.report.output, outputModule, None) if outMod is None: outMod = self.addOutputModule(outputModule) count = outMod.files.fileCount fileSection = "file%s" % count outMod.files.section_(fileSection) fileRef = getattr(outMod.files, fileSection) logging.info("addOutputFile method fileRef: %s, whole tree: %s", fileRef, fileRef.dictionary_whole_tree_()) outMod.files.fileCount += 1 # Now we need to eliminate the optional and non-primitives: # runs, parents, branches, locations and datasets keyList = list(aFile) fileRef.section_("runs") if "runs" in aFile: for run in aFile["runs"]: addRunInfoToFile(fileRef, run) keyList.remove('runs') if "parents" in aFile: setattr(fileRef, 'parents', list(aFile['parents'])) keyList.remove('parents') if "locations" in aFile: fileRef.location = list(aFile["locations"]) keyList.remove('locations') elif "PNN" in aFile: fileRef.location = [aFile["PNN"]] if "LFN" in aFile: fileRef.lfn = aFile["LFN"] keyList.remove("LFN") if "PFN" in aFile: fileRef.lfn = aFile["PFN"] keyList.remove("PFN") # All right, the rest should be JSONalizable python primitives for entry in keyList: setattr(fileRef, entry, aFile[entry]) # And we're done return fileRef def addInputSource(self, sourceName): """ _addInputSource_ Add an input source to the report doing nothing if the input source already exists. """ if hasattr(self.report.input, sourceName): return getattr(self.report.input, sourceName) self.report.input.section_(sourceName) srcMod = getattr(self.report.input, sourceName) srcMod.section_("files") srcMod.files.fileCount = 0 return srcMod def addInputFile(self, sourceName, **attrs): """ _addInputFile_ Add an input file to the given source. """ srcMod = getattr(self.report.input, sourceName, None) if srcMod is None: srcMod = self.addInputSource(sourceName) count = srcMod.files.fileCount fileSection = "file%s" % count srcMod.files.section_(fileSection) fileRef = getattr(srcMod.files, fileSection) srcMod.files.fileCount += 1 keyList = list(attrs) fileRef.section_("runs") if "runs" in attrs: for run in attrs["runs"]: addRunInfoToFile(fileRef, run) keyList.remove('runs') if "parents" in attrs: keyList.remove('parents') if "locations" in attrs: keyList.remove('locations') # All right, the rest should be JSONalizable python primitives for entry in keyList: setattr(fileRef, entry, attrs[entry]) return fileRef def addAnalysisFile(self, filename, **attrs): """ _addAnalysisFile_ Add an Analysis File. """ analysisFiles = self.report.analysis.files count = analysisFiles.fileCount label = "file%s" % count analysisFiles.section_(label) newFile = getattr(analysisFiles, label) newFile.fileName = filename for x, y in viewitems(attrs): setattr(newFile, x, y) analysisFiles.fileCount += 1 return def addRemovedCleanupFile(self, **attrs): """ _addRemovedCleanupFile_ Add a file to the cleanup.removed file """ removedFiles = self.report.cleanup.removed count = self.report.cleanup.removed.fileCount label = 'file%s' % count removedFiles.section_(label) newFile = getattr(removedFiles, label) for x, y in viewitems(attrs): setattr(newFile, x, y) self.report.cleanup.removed.fileCount += 1 return def addError(self, stepName, exitCode, errorType, errorDetails, siteName=None): """ _addError_ Add an error report with an exitCode, type/class of error and details of the error as a string. Also, report attempted site if error happened before landing on it. """ if self.retrieveStep(stepName) is None: # Create a step and set it to failed # Assumption: Adding an error fails a step self.addStep(stepName, status=1) if exitCode is not None: exitCode = int(exitCode) setExitCodes = self.getStepExitCodes(stepName) if exitCode in setExitCodes: logging.warning( "Exit code: %s has been already added to the job report", exitCode) return stepSection = self.retrieveStep(stepName) errorCount = getattr(stepSection.errors, "errorCount", 0) errEntry = "error%s" % errorCount stepSection.errors.section_(errEntry) errDetails = getattr(stepSection.errors, errEntry) errDetails.exitCode = exitCode errDetails.type = str(errorType) try: if isinstance(errorDetails, newstr): errDetails.details = errorDetails elif isinstance(errorDetails, bytes): errDetails.details = decodeBytesToUnicode( errorDetails, 'ignore') else: errDetails.details = newstr(errorDetails) except UnicodeEncodeError as ex: msg = "Failed to encode the job error details for job ID: %s." % self.getJobID( ) msg += "\nException message: %s\nOriginal error details: %s" % ( str(ex), errorDetails) logging.error(msg) msg = "DEFAULT ERROR MESSAGE, because it failed to UTF-8 encode the original message." errDetails.details = msg except UnicodeDecodeError as ex: msg = "Failed to decode the job error details for job ID: %s." % self.getJobID( ) msg += "\nException message: %s\nOriginal error details: %s" % ( str(ex), errorDetails) logging.error(msg) msg = "DEFAULT ERROR MESSAGE, because it failed to UTF-8 decode the original message." errDetails.details = msg setattr(stepSection.errors, "errorCount", errorCount + 1) self.setStepStatus(stepName=stepName, status=exitCode) if siteName: self._setSiteName(site=siteName) return def addSkippedFile(self, lfn, pfn): """ _addSkippedFile_ Report a skipped input file """ count = self.report.skipped.files.fileCount entry = "file%s" % count self.report.skipped.files.section_(entry) skipSect = getattr(self.report.skipped.files, entry) skipSect.PhysicalFileName = pfn skipSect.LogicalFileName = lfn self.report.skipped.files.fileCount += 1 return def addFallbackFile(self, lfn, pfn): """ _addFallbackFile_ Report a fallback attempt for input file """ count = self.report.fallback.files.fileCount entry = "file%s" % count self.report.fallback.files.section_(entry) fallbackSect = getattr(self.report.fallback.files, entry) fallbackSect.PhysicalFileName = pfn fallbackSect.LogicalFileName = lfn self.report.fallback.files.fileCount += 1 return def addSkippedEvent(self, run, event): """ _addSkippedEvent_ Add a skipped event. """ self.report.skipped.events.section_(str(run)) runsect = getattr(self.report.skipped.events, str(run)) if not hasattr(runsect, "eventList"): runsect.eventList = [] runsect.eventList.append(event) return def addStep(self, reportname, status=1): """ _addStep_ This creates a report section into self.report """ if hasattr(self.data, reportname): msg = "Attempted to create pre-existing report section %s" % reportname logging.error(msg) return self.data.steps.append(reportname) self.reportname = reportname self.data.section_(reportname) self.report = getattr(self.data, reportname) self.report.id = None self.report.status = status self.report.outputModules = [] # structure self.report.section_("site") self.report.section_("output") self.report.section_("input") self.report.section_("performance") self.report.section_("analysis") self.report.section_("errors") self.report.section_("skipped") self.report.section_("fallback") self.report.section_("parameters") self.report.section_("logs") self.report.section_("cleanup") self.report.analysis.section_("files") self.report.cleanup.section_("removed") self.report.cleanup.section_("unremoved") self.report.skipped.section_("events") self.report.skipped.section_("files") self.report.fallback.section_("files") self.report.skipped.files.fileCount = 0 self.report.fallback.files.fileCount = 0 self.report.analysis.files.fileCount = 0 self.report.cleanup.removed.fileCount = 0 return def setStep(self, stepName, stepSection): """ _setStep_ """ if stepName not in self.listSteps(): self.data.steps.append(stepName) else: logging.info( "Step %s is now being overridden by a new step report", stepName) self.data.section_(stepName) setattr(self.data, stepName, stepSection) return def retrieveStep(self, step): """ _retrieveStep_ Grabs a report in the raw and returns it. """ reportSection = getattr(self.data, step, None) return reportSection def load(self, filename): """ _load_ This just maps to unpersist """ self.unpersist(filename) return def save(self, filename): """ _save_ This just maps to persist """ self.persist(filename) return def getOutputModule(self, step, outputModule): """ _getOutputModule_ Get the output module from a particular step """ stepReport = self.retrieveStep(step=step) if not stepReport: return None return getattr(stepReport.output, outputModule, None) def getOutputFile(self, fileName, outputModule, step): """ _getOutputFile_ Takes a fileRef object and returns a DataStructs/File object as output """ outputMod = self.getOutputModule(step=step, outputModule=outputModule) if not outputMod: return None fileRef = getattr(outputMod.files, fileName, None) newFile = File(locations=set()) # Locations newFile.setLocation(getattr(fileRef, "location", None)) # Runs runList = fileRef.runs.listSections_() for run in runList: lumis = getattr(fileRef.runs, run) if isinstance(lumis, dict): newRun = Run(int(run), *listitems(lumis)) else: newRun = Run(int(run), *lumis) newFile.addRun(newRun) newFile["lfn"] = getattr(fileRef, "lfn", None) newFile["pfn"] = getattr(fileRef, "pfn", None) newFile["events"] = int(getattr(fileRef, "events", 0)) newFile["size"] = int(getattr(fileRef, "size", 0)) newFile["branches"] = getattr(fileRef, "branches", []) newFile["input"] = getattr(fileRef, "input", []) newFile["inputpfns"] = getattr(fileRef, "inputpfns", []) newFile["branch_hash"] = getattr(fileRef, "branch_hash", None) newFile["catalog"] = getattr(fileRef, "catalog", "") newFile["guid"] = getattr(fileRef, "guid", "") newFile["module_label"] = getattr(fileRef, "module_label", "") newFile["checksums"] = getattr(fileRef, "checksums", {}) newFile["merged"] = bool(getattr(fileRef, "merged", False)) newFile["dataset"] = getattr(fileRef, "dataset", {}) newFile["acquisitionEra"] = getattr(fileRef, 'acquisitionEra', None) newFile["processingVer"] = getattr(fileRef, 'processingVer', None) newFile["validStatus"] = getattr(fileRef, 'validStatus', None) newFile["globalTag"] = getattr(fileRef, 'globalTag', None) newFile["prep_id"] = getattr(fileRef, 'prep_id', None) newFile['configURL'] = getattr(fileRef, 'configURL', None) newFile['inputPath'] = getattr(fileRef, 'inputPath', None) newFile["outputModule"] = outputModule newFile["fileRef"] = fileRef return newFile def getAllFilesFromStep(self, step): """ _getAllFilesFromStep_ For a given step, retrieve all the associated files """ stepReport = self.retrieveStep(step=step) if not stepReport: logging.debug("Asked to retrieve files from non-existant step %s", step) return [] # steps with no outputModules can be ok (even for CMSSW steps) listOfModules = getattr(stepReport, 'outputModules', None) if not listOfModules: return [] listOfFiles = [] for module in listOfModules: listOfFiles.extend( self.getFilesFromOutputModule(step=step, outputModule=module)) return listOfFiles def getAllFiles(self): """ _getAllFiles_ Grabs all files in all output modules in all steps """ listOfFiles = [] for step in self.listSteps(): listOfFiles.extend(self.getAllFilesFromStep(step=step)) return listOfFiles def getAllInputFiles(self): """ _getAllInputFiles_ Gets all the input files """ listOfFiles = [] for step in self.listSteps(): tmp = self.getInputFilesFromStep(stepName=step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getInputFilesFromStep(self, stepName, inputSource=None): """ _getInputFilesFromStep_ Retrieve a list of input files from the given step. """ step = self.retrieveStep(stepName) if inputSource is None: inputSources = step.input.listSections_() else: inputSources = [inputSource] inputFiles = [] for inputSource in inputSources: source = getattr(step.input, inputSource) for fileNum in range(source.files.fileCount): fwjrFile = getattr(source.files, "file%d" % fileNum) lfn = getattr(fwjrFile, "lfn", None) pfn = getattr(fwjrFile, "pfn", None) size = getattr(fwjrFile, "size", 0) events = getattr(fwjrFile, "events", 0) branches = getattr(fwjrFile, "branches", []) catalog = getattr(fwjrFile, "catalog", None) guid = getattr(fwjrFile, "guid", None) inputSourceClass = getattr(fwjrFile, "input_source_class", None) moduleLabel = getattr(fwjrFile, "module_label", None) inputType = getattr(fwjrFile, "input_type", None) inputFile = File(lfn=lfn, size=size, events=events) inputFile["pfn"] = pfn inputFile["branches"] = branches inputFile["catalog"] = catalog inputFile["guid"] = guid inputFile["input_source_class"] = inputSourceClass inputFile["module_label"] = moduleLabel inputFile["input_type"] = inputType runSection = getattr(fwjrFile, "runs") runNumbers = runSection.listSections_() for runNumber in runNumbers: lumiTuple = getattr(runSection, str(runNumber)) inputFile.addRun(Run(int(runNumber), *lumiTuple)) inputFiles.append(inputFile) return inputFiles def getFilesFromOutputModule(self, step, outputModule): """ _getFilesFromOutputModule_ Grab all the files in a particular output module """ outputMod = self.getOutputModule(step=step, outputModule=outputModule) if not outputMod: return [] listOfFiles = [] for n in range(outputMod.files.fileCount): aFile = self.getOutputFile(fileName='file%i' % (n), outputModule=outputModule, step=step) if aFile: listOfFiles.append(aFile) else: msg = "Could not find file%i in module" % (n) logging.error(msg) return [] return listOfFiles def getAllSkippedFiles(self): """ _getAllSkippedFiles_ Get a list of LFNs for all the input files listed as skipped on the report. """ listOfFiles = [] for step in self.listSteps(): tmp = self.getSkippedFilesFromStep(stepName=step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getAllFallbackFiles(self): """ _getAllFallbackFiles_ Get a list of LFNs for all the input files listed as fallback attempt on the report """ listOfFiles = [] for step in self.listSteps(): tmp = self.getFallbackFilesFromStep(stepName=step) if tmp: listOfFiles.extend(tmp) return listOfFiles def getSkippedFilesFromStep(self, stepName): """ _getSkippedFilesFromStep_ Get a list of LFNs skipped in the given step """ skippedFiles = [] step = self.retrieveStep(stepName) filesSection = step.skipped.files fileCount = getattr(filesSection, "fileCount", 0) for fileNum in range(fileCount): fileSection = getattr(filesSection, "file%d" % fileNum) lfn = getattr(fileSection, "LogicalFileName", None) if lfn is not None: skippedFiles.append(lfn) else: logging.error("Found no LFN in file %s", str(fileSection)) return skippedFiles def getFallbackFilesFromStep(self, stepName): """ _getFallbackFilesFromStep_ Get a list of LFNs which triggered a fallback in the given step """ fallbackFiles = [] step = self.retrieveStep(stepName) try: filesSection = step.fallback.files except AttributeError: return fallbackFiles fileCount = getattr(filesSection, "fileCount", 0) for fileNum in range(fileCount): fileSection = getattr(filesSection, "file%d" % fileNum) lfn = getattr(fileSection, "LogicalFileName", None) if lfn is not None: fallbackFiles.append(lfn) else: logging.error("Found no LFN in file %s", str(fileSection)) return fallbackFiles def getStepErrors(self, stepName): """ _getStepErrors_ Get all errors for a given step """ if self.retrieveStep(stepName) is None: # Create a step and set it to failed # Assumption: Adding an error fails a step self.addStep(stepName, status=1) stepSection = self.retrieveStep(stepName) errorCount = getattr(stepSection.errors, "errorCount", 0) if errorCount == 0: return {} else: return stepSection.errors.dictionary_() def stepSuccessful(self, stepName): """ _stepSuccessful_ Determine wether or not a step was successful. """ stepReport = self.retrieveStep(step=stepName) status = getattr(stepReport, 'status', 1) # We have too many possibilities if status not in [0, '0', 'success', 'Success']: return False return True def taskSuccessful(self, ignoreString='logArch'): """ _taskSuccessful_ Return True if all steps successful, False otherwise """ value = True if len(self.listSteps()) == 0: # Mark jobs as failed if they have no steps msg = "Could not find any steps" logging.error(msg) return False for stepName in self.listSteps(): # Ignore specified steps # i.e., logArch steps can fail without causing # the task to fail if ignoreString and re.search(ignoreString, stepName): continue if not self.stepSuccessful(stepName=stepName): value = False return value def getAnalysisFilesFromStep(self, step): """ _getAnalysisFilesFromStep_ Retrieve a list of all the analysis files produced in a step. """ stepReport = self.retrieveStep(step=step) if not stepReport or not hasattr(stepReport.analysis, 'files'): return [] analysisFiles = stepReport.analysis.files results = [] for fileNum in range(analysisFiles.fileCount): results.append(getattr(analysisFiles, "file%s" % fileNum)) # filter out duplicates duplicateCheck = [] filteredResults = [] for result in results: inputtag = getattr(result, 'inputtag', None) if (result.fileName, inputtag) not in duplicateCheck: duplicateCheck.append((result.fileName, inputtag)) filteredResults.append(result) return filteredResults def getAllFileRefsFromStep(self, step): """ _getAllFileRefsFromStep_ Retrieve a list of all files produced in a step. The files will be in the form of references to the ConfigSection objects in the acutal report. """ stepReport = self.retrieveStep(step=step) if not stepReport: return [] outputModules = getattr(stepReport, "outputModules", []) fileRefs = [] for outputModule in outputModules: outputModuleRef = self.getOutputModule(step=step, outputModule=outputModule) for i in range(outputModuleRef.files.fileCount): fileRefs.append(getattr(outputModuleRef.files, "file%i" % i)) analysisFiles = self.getAnalysisFilesFromStep(step) fileRefs.extend(analysisFiles) return fileRefs def addInfoToOutputFilesForStep(self, stepName, step): """ _addInfoToOutputFilesForStep_ Add the information missing from output files to the files This requires the WMStep to be passed in """ stepReport = self.retrieveStep(step=stepName) fileInfo = FileInfo() if not stepReport: return listOfModules = getattr(stepReport, 'outputModules', None) for module in listOfModules: outputMod = getattr(stepReport.output, module, None) for n in range(outputMod.files.fileCount): aFile = getattr(outputMod.files, 'file%i' % n, None) if not aFile: msg = "Could not find file%i in module" % n logging.error(msg) return fileInfo(fileReport=aFile, step=step, outputModule=module) return def deleteOutputModuleForStep(self, stepName, moduleName): """ _deleteOutputModuleForStep_ Delete any reference to the given output module in the step report that includes deleting any output file it produced """ stepReport = self.retrieveStep(step=stepName) if not stepReport: return listOfModules = getattr(stepReport, 'outputModules', []) if moduleName not in listOfModules: return delattr(stepReport.output, moduleName) listOfModules.remove(moduleName) return def setStepStartTime(self, stepName): """ _setStepStatus_ Set the startTime for a step. """ reportStep = self.retrieveStep(stepName) reportStep.startTime = time.time() return def setStepStopTime(self, stepName): """ _setStepStatus_ Set the stopTime for a step. """ reportStep = self.retrieveStep(stepName) reportStep.stopTime = time.time() return def getTimes(self, stepName): """ _getTimes_ Return a dictionary with the start and stop times """ reportStep = self.retrieveStep(stepName) startTime = getattr(reportStep, 'startTime', None) stopTime = getattr(reportStep, 'stopTime', None) return {'startTime': startTime, 'stopTime': stopTime} def getFirstStartLastStop(self): """ _getFirstStartLastStop_ Get the first startTime, last stopTime """ steps = self.listSteps() if len(steps) < 1: return None firstStep = self.getTimes(stepName=steps[0]) startTime = firstStep['startTime'] stopTime = firstStep['stopTime'] for stepName in steps: timeStamps = self.getTimes(stepName=stepName) if timeStamps['startTime'] is None or timeStamps[ 'stopTime'] is None: # Unusable times continue if startTime is None or startTime > timeStamps['startTime']: startTime = timeStamps['startTime'] if stopTime is None or stopTime < timeStamps['stopTime']: stopTime = timeStamps['stopTime'] return {'startTime': startTime, 'stopTime': stopTime} def setTaskName(self, taskName): """ _setTaskName_ Set the task name for the report """ self.data.task = taskName return def getTaskName(self): """ _getTaskName_ Return the task name """ return getattr(self.data, 'task', None) def setJobID(self, jobID): """ _setJobID_ Set the WMBS jobID """ self.data.jobID = jobID return def getJobID(self): """ _getJobID_ Get the WMBS job ID if attached """ return getattr(self.data, 'jobID', None) def getAllFileRefs(self): """ _getAllFileRefs_ Get references for all file in the step """ fileRefs = [] for step in self.listSteps(): tmpRefs = self.getAllFileRefsFromStep(step=step) if len(tmpRefs) > 0: fileRefs.extend(tmpRefs) return fileRefs def setAcquisitionProcessing(self, acquisitionEra, processingVer, processingStr=None): """ _setAcquisitionProcessing_ Set the acquisition and processing era for every output file ONLY run this after all files have been accumulated; it doesn't set things for future files. """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.acquisitionEra = acquisitionEra f.processingVer = processingVer f.processingStr = processingStr return def setValidStatus(self, validStatus): """ _setValidStatus_ Set the validStatus for all steps and all files. ONLY run this after all files have been attached. """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.validStatus = validStatus return def setGlobalTag(self, globalTag): """ _setGlobalTag_ Set the global Tag from the spec on the WN ONLY run this after all the files have been attached """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.globalTag = globalTag return def setCampaign(self, campaign): """ _setCampaign_ Set the campaign for the report """ self.data.campaign = campaign return def getCampaign(self): """ _getCampaign_ Return the campaign """ return getattr(self.data, 'campaign', "") def setPrepID(self, prep_id): """ _setGlobalTag_ Set the global Tag from the spec on the WN ONLY run this after all the files have been attached """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.prep_id = prep_id self.data.prep_id = prep_id return def getPrepID(self): """ _getPrepID_ Return the PrepID """ return getattr(self.data, 'prep_id', "") def setConfigURL(self, configURL): """ _setConfigURL_ Set the config URL in a portable storage form """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.configURL = configURL return def setInputDataset(self, inputPath): """ _setInputDataset_ Set the input dataset path for the task in each file """ fileRefs = self.getAllFileRefs() # Should now have all the fileRefs for f in fileRefs: f.inputPath = inputPath return def setStepRSS(self, stepName, minimum, maximum, average): """ _setStepRSS_ Set the Performance RSS information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('RSSMemory') reportStep.performance.RSSMemory.min = minimum reportStep.performance.RSSMemory.max = maximum reportStep.performance.RSSMemory.average = average return def setStepPMEM(self, stepName, minimum, maximum, average): """ _setStepPMEM_ Set the Performance PMEM information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('PhysicalMemory') reportStep.performance.PhysicalMemory.min = minimum reportStep.performance.PhysicalMemory.max = maximum reportStep.performance.PhysicalMemory.average = average return def setStepPCPU(self, stepName, minimum, maximum, average): """ _setStepPCPU_ Set the Performance PCPU information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('PercentCPU') reportStep.performance.PercentCPU.min = minimum reportStep.performance.PercentCPU.max = maximum reportStep.performance.PercentCPU.average = average return def setStepVSize(self, stepName, minimum, maximum, average): """ _setStepVSize_ Set the Performance PCPU information """ reportStep = self.retrieveStep(stepName) reportStep.performance.section_('VSizeMemory') reportStep.performance.VSizeMemory.min = minimum reportStep.performance.VSizeMemory.max = maximum reportStep.performance.VSizeMemory.average = average return def setStepCounter(self, stepName, counter): """ _setStepCounter_ Assign a number to the step """ reportStep = self.retrieveStep(stepName) reportStep.counter = counter return def checkForAdlerChecksum(self, stepName): """ _checkForAdlerChecksum_ Some steps require that all output files have adler checksums This will go through all output files in a step and make sure they have an adler32 checksum. If they don't it creates an error with code 60451 for the step, failing it. """ error = None files = self.getAllFilesFromStep(step=stepName) for f in files: if 'adler32' not in f.get('checksums', {}): error = f.get('lfn', None) elif f['checksums']['adler32'] is None: error = f.get('lfn', None) if error: msg = '%s, file was %s' % (WM_JOB_ERROR_CODES[60451], error) self.addError(stepName, 60451, "NoAdler32Checksum", msg) self.setStepStatus(stepName=stepName, status=60451) return def checkForRunLumiInformation(self, stepName): """ _checkForRunLumiInformation_ Some steps require that all output files have run lumi information. This will go through all output files in a step and make sure they have run/lumi informaiton. If they don't it creates an error with code 70452 for the step, failing it. """ error = None files = self.getAllFilesFromStep(step=stepName) for f in files: if not f.get('runs', None): error = f.get('lfn', None) else: for run in f['runs']: lumis = run.lumis if not lumis: error = f.get('lfn', None) break if error: msg = '%s, file was %s' % (WM_JOB_ERROR_CODES[70452], error) self.addError(stepName, 70452, "NoRunLumiInformation", msg) self.setStepStatus(stepName=stepName, status=70452) return def checkForOutputFiles(self, stepName): """ _checkForOutputFiles_ Verify that there is at least an output file, either from analysis or from an output module. """ files = self.getAllFilesFromStep(step=stepName) analysisFiles = self.getAnalysisFilesFromStep(step=stepName) if len(files) == 0 and len(analysisFiles) == 0: msg = WM_JOB_ERROR_CODES[60450] msg += "\nList of skipped files is:\n" for skipF in self.getSkippedFilesFromStep(stepName=stepName): msg += " %s\n" % skipF self.addError(stepName, 60450, "NoOutput", msg) self.setStepStatus(stepName=stepName, status=60450) return def stripInputFiles(self): """ _stripInputFiles_ If we need to compact the FWJR the easiest way is just to trim the number of input files. """ for stepName in self.listSteps(): step = self.retrieveStep(stepName) inputSources = step.input.listSections_() for inputSource in inputSources: source = getattr(step.input, inputSource) for fileNum in range(source.files.fileCount): delattr(source.files, "file%d" % fileNum) source.files.fileCount = 0 return def getWorkerNodeInfo(self): wnInfo = { "HostName": getattr(self.data, 'hostName', ''), "MachineFeatures": getattr(self.data, 'machineFeatures', {}), "JobFeatures": getattr(self.data, 'jobFeatures', {}) } return wnInfo def setLogURL(self, url): """ Set log url for the this job report. https://eoscmsweb.cern.ch/eos/cms/store/logs/prod/recent/ """ self.data.logURL = url def getLogURL(self): """ _getLogURL_ Return the log URL """ return getattr(self.data, 'logURL', '')
""" __MergeSample__ Example of a report from a merge job Created on Fri Jun 8 13:22:30 2012 @author: dballest """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = '/Run195376-MuEG-Run2012B-PromptReco-v1-MuEG/DataProcessing/DataProcessingMergeSKIMStreamLogError' FrameworkJobReport.workload = 'Unknown' FrameworkJobReport.section_('cmsRun1') FrameworkJobReport.cmsRun1.status = 0 FrameworkJobReport.cmsRun1.counter = 1 FrameworkJobReport.cmsRun1.section_('cleanup') FrameworkJobReport.cmsRun1.cleanup.section_('unremoved') FrameworkJobReport.cmsRun1.cleanup.section_('removed') FrameworkJobReport.cmsRun1.cleanup.removed.fileCount = 0 FrameworkJobReport.cmsRun1.section_('errors') FrameworkJobReport.cmsRun1.section_('logs') FrameworkJobReport.cmsRun1.section_('parameters') FrameworkJobReport.cmsRun1.parameters.GeneratorInfo = '' FrameworkJobReport.cmsRun1.parameters.ReadBranches = '' FrameworkJobReport.cmsRun1.outputModules = ['Merged'] FrameworkJobReport.cmsRun1.stopTime = 1338808530.44 FrameworkJobReport.cmsRun1.section_('site') FrameworkJobReport.cmsRun1.section_('analysis')
""" __ErrorSample__ Example of a report from a job that failed Created on Fri Jun 8 13:22:11 2012 @author: dballest """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = '/Run195530-PhotonHad-Run2012B-PromptReco-v1-PhotonHad/DataProcessing' FrameworkJobReport.workload = 'Unknown' FrameworkJobReport.section_('cmsRun1') FrameworkJobReport.cmsRun1.status = 1 FrameworkJobReport.cmsRun1.counter = 1 FrameworkJobReport.cmsRun1.section_('errors') FrameworkJobReport.cmsRun1.errors.section_('error0') FrameworkJobReport.cmsRun1.errors.error0.type = 'CMSSWStepFailure' FrameworkJobReport.cmsRun1.errors.error0.details = '' FrameworkJobReport.cmsRun1.errors.error0.exitCode = 84 FrameworkJobReport.cmsRun1.errors.section_('error1') FrameworkJobReport.cmsRun1.errors.error1.type = 'Fatal Exception' FrameworkJobReport.cmsRun1.errors.error1.details = '' FrameworkJobReport.cmsRun1.errors.error1.exitCode = '8020' FrameworkJobReport.cmsRun1.errors.section_('error2') FrameworkJobReport.cmsRun1.errors.error2.type = 'ErrorLoggingAddition' FrameworkJobReport.cmsRun1.errors.error2.details = '' FrameworkJobReport.cmsRun1.errors.errorCount = 3
Example of a report from a merge job Created on Fri Jun 8 13:22:30 2012 @author: dballest """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = ( "/Run195376-MuEG-Run2012B-PromptReco-v1-MuEG/DataProcessing/DataProcessingMergeSKIMStreamLogError" ) FrameworkJobReport.workload = "Unknown" FrameworkJobReport.section_("cmsRun1") FrameworkJobReport.cmsRun1.status = 0 FrameworkJobReport.cmsRun1.counter = 1 FrameworkJobReport.cmsRun1.section_("cleanup") FrameworkJobReport.cmsRun1.cleanup.section_("unremoved") FrameworkJobReport.cmsRun1.cleanup.section_("removed") FrameworkJobReport.cmsRun1.cleanup.removed.fileCount = 0 FrameworkJobReport.cmsRun1.section_("errors") FrameworkJobReport.cmsRun1.section_("logs") FrameworkJobReport.cmsRun1.section_("parameters") FrameworkJobReport.cmsRun1.parameters.GeneratorInfo = "" FrameworkJobReport.cmsRun1.parameters.ReadBranches = "" FrameworkJobReport.cmsRun1.outputModules = ["Merged"] FrameworkJobReport.cmsRun1.stopTime = 1338808530.44 FrameworkJobReport.cmsRun1.section_("site") FrameworkJobReport.cmsRun1.section_("analysis")
class Aggregator(object): """ _Aggregator_ Util to aggregate performance reports for multicore jobs into a single performance report, including a multicore section to allow profiling of internal performance """ def __init__(self): self.numCores = 0 self.sections = {} self.values = {} self.report = ConfigSection("performance") # # populate the aggregator with the list of expected keys # based on the functions map above # create a combined performance report with the appropriate sections for red in AggrFunctions.keys(): self.values[red] = [] sect, param = red.split(".") if not self.sections.has_key(sect): self.sections[sect] = [] self.report.section_(sect) if param not in self.sections[sect]: self.sections[sect].append(param) def add(self, perfRep): """ _add_ Add the contents of the given performance rep to this aggregator """ self.numCores += 1 for sect in self.sections.keys(): for param in self.sections[sect]: key = ".".join([sect, param]) try: #protect against weird cases like NaNs in the reports value = float(getSectParam(perfRep, sect, param)) except AttributeError: #protect against missing parameters continue except ValueError: continue self.values[key].append(value) def aggregate(self): """ _aggregate_ For each key in the map, run the appropriate aggregation function on it """ for key, vals in self.values.items(): # avoid divide by zero averages etc if len(vals) == 0: continue aggFunc = AggrFunctions[key] sect, param = key.split(".") section = getattr(self.report, sect) setattr(section, param, aggFunc(vals)) self.createMulticoreSection() return self.report def createMulticoreSection(self): """ _createMulticoreSection_ create the multicore report section """ self.report.section_("multicore") self.report.multicore.coresUsed = self.numCores if self.values.has_key("cpu.TotalJobTime"): vals = self.values["cpu.TotalJobTime"] self.report.multicore.summedProcessTime = sum(vals) self.report.multicore.averageProcessTime = average(vals) self.report.multicore.maxProcessTime = max(vals) self.report.multicore.minProcessTime = min(vals) self.report.multicore.processWaitingTime = max(vals) - min(vals) stepEffNom = float(sum(vals)) / float((max(vals) * self.numCores)) stepEffDenom = float(average(vals)) / float(max(vals)) stepEff = stepEffNom/stepEffDenom self.report.multicore.stepEfficiency = stepEff # frame in the merge report values # need to be set from the MulticoreCMSSW Executor self.report.multicore.mergeStartTime = None self.report.multicore.mergeEndTime = None self.report.multicore.numberOfMerges = None self.report.multicore.totalStepTime = None self.report.multicore.averageMergeTime = None self.report.multicore.maxMergeTime = None self.report.multicore.minMergeTime = None
class Aggregator(object): """ _Aggregator_ Util to aggregate performance reports for multicore jobs into a single performance report, including a multicore section to allow profiling of internal performance """ def __init__(self): self.numCores = 0 self.sections = {} self.values = {} self.report = ConfigSection("performance") # # populate the aggregator with the list of expected keys # based on the functions map above # create a combined performance report with the appropriate sections for red in AggrFunctions.keys(): self.values[red] = [] sect, param = red.split(".") if not self.sections.has_key(sect): self.sections[sect] = [] self.report.section_(sect) if param not in self.sections[sect]: self.sections[sect].append(param) def add(self, perfRep): """ _add_ Add the contents of the given performance rep to this aggregator """ self.numCores += 1 for sect in self.sections.keys(): for param in self.sections[sect]: key = ".".join([sect, param]) try: #protect against weird cases like NaNs in the reports value = float(getSectParam(perfRep, sect, param)) except AttributeError: #protect against missing parameters continue except ValueError: continue self.values[key].append(value) def aggregate(self): """ _aggregate_ For each key in the map, run the appropriate aggregation function on it """ for key, vals in self.values.items(): # avoid divide by zero averages etc if len(vals) == 0: continue aggFunc = AggrFunctions[key] sect, param = key.split(".") section = getattr(self.report, sect) setattr(section, param, aggFunc(vals)) self.createMulticoreSection() return self.report def createMulticoreSection(self): """ _createMulticoreSection_ create the multicore report section """ self.report.section_("multicore") self.report.multicore.coresUsed = self.numCores if self.values.has_key("cpu.TotalJobTime"): vals = self.values["cpu.TotalJobTime"] self.report.multicore.summedProcessTime = sum(vals) self.report.multicore.averageProcessTime = average(vals) self.report.multicore.maxProcessTime = max(vals) self.report.multicore.minProcessTime = min(vals) self.report.multicore.processWaitingTime = max(vals) - min(vals) stepEffNom = float(sum(vals)) / float((max(vals) * self.numCores)) stepEffDenom = float(average(vals)) / float(max(vals)) stepEff = stepEffNom / stepEffDenom self.report.multicore.stepEfficiency = stepEff # frame in the merge report values # need to be set from the MulticoreCMSSW Executor self.report.multicore.mergeStartTime = None self.report.multicore.mergeEndTime = None self.report.multicore.numberOfMerges = None self.report.multicore.totalStepTime = None self.report.multicore.averageMergeTime = None self.report.multicore.maxMergeTime = None self.report.multicore.minMergeTime = None
#!/usr/bin/env python # -*- coding: utf-8 -*- """ __FallbackSample__ Example of a report from a job that had xrootd fallback reads """ from WMCore.Configuration import ConfigSection from WMCore.FwkJobReport.Report import Report FrameworkJobReport = ConfigSection("FrameworkJobReport") FrameworkJobReport.task = '/Run195530-PhotonHad-Run2012B-PromptReco-v1-PhotonHad/DataProcessing' FrameworkJobReport.workload = 'Unknown' FrameworkJobReport.section_('cmsRun1') FrameworkJobReport.cmsRun1.status = 1 FrameworkJobReport.cmsRun1.section_('cleanup') FrameworkJobReport.cmsRun1.cleanup.section_('unremoved') FrameworkJobReport.cmsRun1.cleanup.section_('removed') FrameworkJobReport.cmsRun1.cleanup.removed.fileCount = 0 FrameworkJobReport.cmsRun1.section_('errors') FrameworkJobReport.cmsRun1.section_('logs') FrameworkJobReport.cmsRun1.section_('parameters') FrameworkJobReport.cmsRun1.parameters.ReadBranches = '' FrameworkJobReport.cmsRun1.outputModules = [] FrameworkJobReport.cmsRun1.section_('site') FrameworkJobReport.cmsRun1.section_('analysis') FrameworkJobReport.cmsRun1.analysis.section_('files') FrameworkJobReport.cmsRun1.analysis.files.fileCount = 0 FrameworkJobReport.cmsRun1.section_('performance')