def testGetLargestValue(self): """ _testGetLargestValue_ See if we can get the largest value from a list of histograms for a specific key """ l = [{ 'a': 102, 'b': 200, 'name': 'One' }, { 'a': 101, 'b': 199, 'name': 'Two' }, { 'a': 100, 'b': 198, 'name': 'Three' }, { 'a': 103, 'b': 197, 'name': 'Four' }] result = MathAlgos.getLargestValues(dictList=l, key='a', n=2) self.assertEqual(result, [{ 'a': 103, 'b': 197, 'name': 'Four' }, { 'a': 102, 'b': 200, 'name': 'One' }]) result = MathAlgos.getLargestValues(dictList=l, key='b', n=3) self.assertEqual(result, [{ 'a': 102, 'b': 200, 'name': 'One' }, { 'a': 101, 'b': 199, 'name': 'Two' }, { 'a': 100, 'b': 198, 'name': 'Three' }]) return
def testGetLargestValue(self): """ _testGetLargestValue_ See if we can get the largest value from a list of histograms for a specific key """ l = [{'a': 102, 'b': 200, 'name': 'One'}, {'a': 101, 'b': 199, 'name': 'Two'}, {'a': 100, 'b': 198, 'name': 'Three'}, {'a': 103, 'b': 197, 'name': 'Four'}] result = MathAlgos.getLargestValues(dictList = l, key = 'a', n = 2) self.assertEqual(result, [{'a': 103, 'b': 197, 'name': 'Four'}, {'a': 102, 'b': 200, 'name': 'One'}]) result = MathAlgos.getLargestValues(dictList = l, key = 'b', n = 3) self.assertEqual(result, [{'a': 102, 'b': 200, 'name': 'One'}, {'a': 101, 'b': 199, 'name': 'Two'}, {'a': 100, 'b': 198, 'name': 'Three'}]) return
def handleCouchPerformance(self, workflowName): """ _handleCouchPerformance_ The couch performance stuff is convoluted enough I think I want to handle it separately. """ perf = self.fwjrdatabase.loadView("FWJRDump", "performanceByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName]})['rows'] taskList = {} finalTask = {} for row in perf: taskName = row['value']['taskName'] stepName = row['value']['stepName'] if not taskName in taskList.keys(): taskList[taskName] = {} if not stepName in taskList[taskName].keys(): taskList[taskName][stepName] = [] value = row['value'] taskList[taskName][stepName].append(value) for taskName in taskList.keys(): final = {} for stepName in taskList[taskName].keys(): output = {'jobTime': []} final[stepName] = {} masterList = [] # For each step put the data into a dictionary called output # keyed by the name of the value for row in taskList[taskName][stepName]: masterList.append(row) for key in row.keys(): if key in ['startTime', 'stopTime', 'taskName', 'stepName', 'jobID']: continue if not key in output.keys(): output[key] = [] try: output[key].append(float(row[key])) except TypeError: # Why do we get None values here? # We may want to look into it logging.debug("Got a None performance value for key %s" % key) if row[key] == None: output[key].append(0.0) else: raise try: jobTime = row.get('stopTime', None) - row.get('startTime', None) output['jobTime'].append(jobTime) row['jobTime'] = jobTime except TypeError: # One of those didn't have a real value pass # Now that we've sorted the data, we process it one key at a time for key in output.keys(): final[stepName][key] = {} # Assemble the 'worstOffenders' # These are the top [self.nOffenders] in that particular category # i.e., those with the highest values offenders = MathAlgos.getLargestValues(dictList = masterList, key = key, n = self.nOffenders) for x in offenders: try: logArchive = self.fwjrdatabase.loadView("FWJRDump", "logArchivesByJobID", options = {"startkey": [x['jobID']], "endkey": [x['jobID'], x['retry_count']]})['rows'][0]['value']['lfn'] logCollectID = self.jobsdatabase.loadView("JobDump", "jobsByInputLFN", options = {"startkey": [workflowName, logArchive], "endkey": [workflowName, logArchive]})['rows'][0]['value'] logCollect = self.fwjrdatabase.loadView("FWJRDump", "outputByJobID", options = {"startkey": logCollectID, "endkey": logCollectID})['rows'][0]['value']['lfn'] x['logArchive'] = logArchive.split('/')[-1] x['logCollect'] = logCollect except IndexError, ex: logging.debug("Unable to find final logArchive tarball for %i" % x['jobID']) logging.debug(str(ex)) except KeyError, ex: logging.debug("Unable to find final logArchive tarball for %i" % x['jobID']) logging.debug(str(ex)) if key in self.histogramKeys: histogram = MathAlgos.createHistogram(numList = output[key], nBins = self.histogramBins, limit = self.histogramLimit) final[stepName][key]['histogram'] = histogram else: average, stdDev = MathAlgos.getAverageStdDev(numList = output[key]) final[stepName][key]['average'] = average final[stepName][key]['stdDev'] = stdDev final[stepName][key]['worstOffenders'] = [{'jobID': x['jobID'], 'value': x.get(key, 0.0), 'log': x.get('logArchive', None), 'logCollect': x.get('logCollect', None)} for x in offenders]
def handleCouchPerformance(self, workflowName): """ _handleCouchPerformance_ The couch performance stuff is convoluted enough I think I want to handle it separately. """ perf = self.fwjrdatabase.loadView("FWJRDump", "performanceByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName] })['rows'] failedJobs = self.getFailedJobs(workflowName) taskList = {} finalTask = {} for row in perf: taskName = row['value']['taskName'] stepName = row['value']['stepName'] if not taskName in taskList.keys(): taskList[taskName] = {} if not stepName in taskList[taskName].keys(): taskList[taskName][stepName] = [] value = row['value'] taskList[taskName][stepName].append(value) for taskName in taskList.keys(): final = {} for stepName in taskList[taskName].keys(): output = {'jobTime': []} outputFailed = { 'jobTime': [] } # This will be same, but only for failed jobs final[stepName] = {} masterList = [] # For each step put the data into a dictionary called output # keyed by the name of the value for row in taskList[taskName][stepName]: masterList.append(row) for key in row.keys(): if key in [ 'startTime', 'stopTime', 'taskName', 'stepName', 'jobID' ]: continue if not key in output.keys(): output[key] = [] if len(failedJobs) > 0: outputFailed[key] = [] try: output[key].append(float(row[key])) if (row['jobID'] in failedJobs): outputFailed[key].append(float(row[key])) except TypeError: # Why do we get None values here? # We may want to look into it logging.debug( "Got a None performance value for key %s" % key) if row[key] == None: output[key].append(0.0) else: raise try: jobTime = row.get('stopTime', None) - row.get( 'startTime', None) output['jobTime'].append(jobTime) row['jobTime'] = jobTime # Account job running time here only if the job has failed if (row['jobID'] in failedJobs): outputFailed['jobTime'].append(jobTime) except TypeError: # One of those didn't have a real value pass # Now that we've sorted the data, we process it one key at a time for key in output.keys(): final[stepName][key] = {} # Assemble the 'worstOffenders' # These are the top [self.nOffenders] in that particular category # i.e., those with the highest values offenders = MathAlgos.getLargestValues(dictList=masterList, key=key, n=self.nOffenders) for x in offenders: try: logArchive = self.fwjrdatabase.loadView( "FWJRDump", "logArchivesByJobID", options={ "startkey": [x['jobID']], "endkey": [x['jobID'], x['retry_count']] })['rows'][0]['value']['lfn'] logCollectID = self.jobsdatabase.loadView( "JobDump", "jobsByInputLFN", options={ "startkey": [workflowName, logArchive], "endkey": [workflowName, logArchive] })['rows'][0]['value'] logCollect = self.fwjrdatabase.loadView( "FWJRDump", "outputByJobID", options={ "startkey": logCollectID, "endkey": logCollectID })['rows'][0]['value']['lfn'] x['logArchive'] = logArchive.split('/')[-1] x['logCollect'] = logCollect except IndexError, ex: logging.debug( "Unable to find final logArchive tarball for %i" % x['jobID']) logging.debug(str(ex)) except KeyError, ex: logging.debug( "Unable to find final logArchive tarball for %i" % x['jobID']) logging.debug(str(ex)) if key in self.histogramKeys: # Usual histogram that was always done histogram = MathAlgos.createHistogram( numList=output[key], nBins=self.histogramBins, limit=self.histogramLimit) final[stepName][key]['histogram'] = histogram # Histogram only picking values from failed jobs # Operators can use it to find out quicker why a workflow/task/step is failing : if len(failedJobs) > 0: failedJobsHistogram = MathAlgos.createHistogram( numList=outputFailed[key], nBins=self.histogramBins, limit=self.histogramLimit) final[stepName][key][ 'errorsHistogram'] = failedJobsHistogram else: average, stdDev = MathAlgos.getAverageStdDev( numList=output[key]) final[stepName][key]['average'] = average final[stepName][key]['stdDev'] = stdDev final[stepName][key]['worstOffenders'] = [{ 'jobID': x['jobID'], 'value': x.get(key, 0.0), 'log': x.get('logArchive', None), 'logCollect': x.get('logCollect', None) } for x in offenders]
def handleCouchPerformance(self, workflowName): """ _handleCouchPerformance_ The couch performance stuff is convoluted enough I think I want to handle it separately. """ perf = self.fwjrdatabase.loadView( "FWJRDump", "performanceByWorkflowName", options={"startkey": [workflowName], "endkey": [workflowName]} )["rows"] failedJobs = self.getFailedJobs(workflowName) taskList = {} finalTask = {} for row in perf: taskName = row["value"]["taskName"] stepName = row["value"]["stepName"] if not taskName in taskList.keys(): taskList[taskName] = {} if not stepName in taskList[taskName].keys(): taskList[taskName][stepName] = [] value = row["value"] taskList[taskName][stepName].append(value) for taskName in taskList.keys(): final = {} for stepName in taskList[taskName].keys(): output = {"jobTime": []} outputFailed = {"jobTime": []} # This will be same, but only for failed jobs final[stepName] = {} masterList = [] # For each step put the data into a dictionary called output # keyed by the name of the value for row in taskList[taskName][stepName]: masterList.append(row) for key in row.keys(): if key in ["startTime", "stopTime", "taskName", "stepName", "jobID"]: continue if not key in output.keys(): output[key] = [] if len(failedJobs) > 0: outputFailed[key] = [] try: output[key].append(float(row[key])) if row["jobID"] in failedJobs: outputFailed[key].append(float(row[key])) except TypeError: # Why do we get None values here? # We may want to look into it logging.debug("Got a None performance value for key %s" % key) if row[key] == None: output[key].append(0.0) else: raise try: jobTime = row.get("stopTime", None) - row.get("startTime", None) output["jobTime"].append(jobTime) row["jobTime"] = jobTime # Account job running time here only if the job has failed if row["jobID"] in failedJobs: outputFailed["jobTime"].append(jobTime) except TypeError: # One of those didn't have a real value pass # Now that we've sorted the data, we process it one key at a time for key in output.keys(): final[stepName][key] = {} # Assemble the 'worstOffenders' # These are the top [self.nOffenders] in that particular category # i.e., those with the highest values offenders = MathAlgos.getLargestValues(dictList=masterList, key=key, n=self.nOffenders) for x in offenders: try: logArchive = self.fwjrdatabase.loadView( "FWJRDump", "logArchivesByJobID", options={"startkey": [x["jobID"]], "endkey": [x["jobID"], x["retry_count"]]}, )["rows"][0]["value"]["lfn"] logCollectID = self.jobsdatabase.loadView( "JobDump", "jobsByInputLFN", options={"startkey": [workflowName, logArchive], "endkey": [workflowName, logArchive]}, )["rows"][0]["value"] logCollect = self.fwjrdatabase.loadView( "FWJRDump", "outputByJobID", options={"startkey": logCollectID, "endkey": logCollectID} )["rows"][0]["value"]["lfn"] x["logArchive"] = logArchive.split("/")[-1] x["logCollect"] = logCollect except IndexError, ex: logging.debug("Unable to find final logArchive tarball for %i" % x["jobID"]) logging.debug(str(ex)) except KeyError, ex: logging.debug("Unable to find final logArchive tarball for %i" % x["jobID"]) logging.debug(str(ex)) if key in self.histogramKeys: # Usual histogram that was always done histogram = MathAlgos.createHistogram( numList=output[key], nBins=self.histogramBins, limit=self.histogramLimit ) final[stepName][key]["histogram"] = histogram # Histogram only picking values from failed jobs # Operators can use it to find out quicker why a workflow/task/step is failing : if len(failedJobs) > 0: failedJobsHistogram = MathAlgos.createHistogram( numList=outputFailed[key], nBins=self.histogramBins, limit=self.histogramLimit ) final[stepName][key]["errorsHistogram"] = failedJobsHistogram else: average, stdDev = MathAlgos.getAverageStdDev(numList=output[key]) final[stepName][key]["average"] = average final[stepName][key]["stdDev"] = stdDev final[stepName][key]["worstOffenders"] = [ { "jobID": x["jobID"], "value": x.get(key, 0.0), "log": x.get("logArchive", None), "logCollect": x.get("logCollect", None), } for x in offenders ]