def results(self): meanval = [] maxval = [] meanvalpercore = [] maxvalpercore = [] hinv = self._job.getdata('hinv') for nodename, loaddata in self._data.iteritems(): if loaddata.count() > 0: meanval.append(loaddata.mean()) maxval.append(loaddata.max) if hinv != None and nodename in hinv: meanvalpercore.append(loaddata.mean() / hinv[nodename]['cores']) maxvalpercore.append(loaddata.max / hinv[nodename]['cores']) if len(meanval) == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} results = { "mean": calculate_stats(meanval), "max": calculate_stats(maxval) } if len(meanvalpercore) > 0: results['meanpercore'] = calculate_stats(meanvalpercore) results['maxpercore'] = calculate_stats(maxvalpercore) return results
def results(self): if self._error != None: return {"error": self._error} nhosts = len(self._data) if nhosts < 1: return {"error": ProcessingError.INSUFFICIENT_HOSTDATA} flops = numpy.zeros(self._totalcores) cpiref = numpy.zeros(self._totalcores) cpldref = numpy.zeros(self._totalcores) coreindex = 0 for _, data in self._data.iteritems(): if len(data) == len(NHM_METRICS): flops[coreindex:coreindex + len(data[0])] = 1.0 * data[3] cpiref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[1] cpldref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[2] coreindex += len(data[0]) elif len(data) == len(SNB_METRICS): flops[coreindex:coreindex + len(data[0])] = 4.0 * data[3] + 2.0 * data[4] + 1.0 * data[5] + 1.0 * data[6] cpiref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[1] cpldref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[2] coreindex += len(data[0]) else: return {"error": ProcessingError.INSUFFICIENT_DATA} results = {"flops": calculate_stats(flops), "cpiref": calculate_stats(cpiref), "cpldref": calculate_stats(cpldref)} return results
def results(self): if len(self._data) != self._job.nodecount: return {"error": ProcessingError.INSUFFICIENT_HOSTDATA} for hoststat in self._hostcounts.itervalues(): if hoststat['missing'] > hoststat['present']: return {"error": ProcessingError.CPUSET_UNKNOWN} stats = {"usage": {"avg": [], "max": []}, "limit": [], "usageratio": {"avg": [], "max": []}} datapoints = 0 for memdata in self._data.itervalues(): if memdata[0].count() > 0: datapoints += 1 stats["usage"]["avg"].append(memdata[0].mean()) stats["usage"]["max"].append(memdata[0].max) stats["limit"].append(memdata[1].max) stats["usageratio"]["avg"].append(memdata[2].mean()) stats["usageratio"]["max"].append(memdata[2].max) if datapoints == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} result = {"usage": {}, "usageratio": {}} result["usage"]["avg"] = calculate_stats(stats["usage"]["avg"]) result["usage"]["max"] = calculate_stats(stats["usage"]["max"]) result["limit"] = calculate_stats(stats["limit"]) result["usageratio"]["avg"] = calculate_stats(stats["usageratio"]["avg"]) result["usageratio"]["max"] = calculate_stats(stats["usageratio"]["max"]) return result
def results(self): if self.end_time - self.start_time < self.MIN_WALLTIME: return {'error': ProcessingError.JOB_TOO_SHORT} metric_data = { metric: { # Store data for each node (inner array), for each section (outer array) 'sections': [[] for _ in range(self.SECTIONS)], 'nodes_used': 0 } for metric in self.metricNames } for nodename, node in iteritems(self.nodes): if not len(node['all_times']) > self.DATAPOINT_THRESHOLD: node['data_error'] = True continue for metric, data in node['last_value']: # calculate last section avg = (data - node['section_start_data'][metric]) / ( self.end_time - node['section_start_timestamp']) node['section_avgs'][metric].append(avg) # only use nodes which have enough data if len(node['section_avgs'] [metric]) == self.SECTIONS and not node['data_error']: metric_data[metric]['nodes_used'] += 1 # add node to the section aggregates for i in range(self.SECTIONS): metric_data[metric]['sections'][i].append( node['section_avgs'][metric][i]) for metric_name, metric in iteritems(metric_data): # If a metric didn't have enough viable nodes, report error due to insufficient data if metric['nodes_used'] < self.MIN_NODES: metric_data[metric_name] = { 'error': ProcessingError.INSUFFICIENT_DATA } continue # Use stats across the nodes instead of reporting all node data individually metric['sections'] = [ calculate_stats(nodes) for nodes in metric['sections'] ] metric['section_start_timestamps'] = [ calculate_stats(sect) for sect in self.section_start_timestamps ] if _HAS_AUTOPERIOD: # TODO: Decide on error message to have otherwise metric['autoperiod'] = _calculate_autoperiod( self.nodes, metric_name, self.resource, self.jobid) return metric_data
def _calculate_autoperiod(nodes, metric, resource, jobid): times_interp = None summed_values = None # Interpolate times and values so sampling interval is constant, and sum nodes for nodename, node in iteritems(nodes): if node['data_error']: continue if times_interp is None: times_interp = np.linspace(min(node['all_times']), max(node['all_times']), len(node['all_times'])) summed_values = np.interp(times_interp, node['all_times'], node['all_data'][metric]) else: if summed_values is None: summed_values = np.interp(times_interp, node['all_times'], node['all_data'][metric]) else: summed_values += np.interp(times_interp, node['all_times'], node['all_data'][metric]) autoperiod = Autoperiod( *convert_to_rates(times_interp, summed_values), threshold_method='stat') if not np.allclose(summed_values, 0) else None if autoperiod is None or autoperiod.period is None: return None else: ap_data = { "period": autoperiod.period, "phase_shift_guess": autoperiod.phase_shift_guess } on_period_block_areas, off_period_block_areas = autoperiod.period_block_areas( ) on_period = calculate_stats(on_period_block_areas) off_period = calculate_stats(off_period_block_areas) on_period['sum'] = np.sum(on_period_block_areas) off_period['sum'] = np.sum(off_period_block_areas) ap_data['on_period'] = on_period ap_data['off_period'] = off_period normalized_score = (on_period['sum'] - off_period['sum']) / ( on_period['sum'] + off_period['sum']) ap_data['normalized_score'] = normalized_score return ap_data
def results(self): result = {} for data in self._data.itervalues(): for i, devicename in enumerate(data['names']): if devicename not in result: result[devicename] = {} for statname in self.statnames: result[devicename][statname] = [] result[devicename][statname + "max"] = [] for statname in self.statnames: result[devicename][statname].append( data[statname].mean()[i]) result[devicename][statname + "max"].append( data[statname].max[i]) output = {} for device, data in result.iteritems(): output[device] = {} for statname, datalist in data.iteritems(): output[device][statname] = calculate_stats(datalist) if len(output) == 0: output['error'] = "no data" return output
def computejobcpus(self): """ stats for the cores on the nodes that were assigend to the job (if available) """ proc = self._job.getdata('proc') if proc == None: return {"error": ProcessingError.CPUSET_UNKNOWN} cpusallowed = self._job.getdata('proc')['cpusallowed'] ratios = numpy.empty((self._ncpumetrics, self._totalcores), numpy.double) coreindex = 0 for host, last in self._last.iteritems(): elapsed = last - self._first[host] if host in cpusallowed and 'error' not in cpusallowed[host]: elapsed = elapsed[:, cpusallowed[host]] else: return {"error": ProcessingError.CPUSET_UNKNOWN} coresperhost = len(elapsed[0, :]) ratios[:, coreindex:(coreindex+coresperhost)] = 1.0 * elapsed / numpy.sum(elapsed, 0) coreindex += coresperhost results = {} for i, name in enumerate(self._outnames): results[name] = calculate_stats(ratios[i, :coreindex]) results['all'] = {"cnt": coreindex} return results
def computejobcpus(self): """ stats for the cores on the nodes that were assigend to the job (if available) """ proc = self._job.getdata('proc') if proc == None: return {"error": ProcessingError.CPUSET_UNKNOWN} cpusallowed = self._job.getdata('proc')['cpusallowed'] ratios = numpy.empty((self._ncpumetrics, self._totalcores), numpy.double) coreindex = 0 for host, last in self._last.iteritems(): elapsed = last - self._first[host] if host in cpusallowed and 'error' not in cpusallowed[host]: elapsed = elapsed[:, cpusallowed[host]] else: return {"error": ProcessingError.CPUSET_UNKNOWN} coresperhost = len(elapsed[0, :]) ratios[:, coreindex:( coreindex + coresperhost)] = 1.0 * elapsed / numpy.sum(elapsed, 0) coreindex += coresperhost results = {} for i, name in enumerate(self._outnames): results[name] = calculate_stats(ratios[i, :coreindex]) results['all'] = {"cnt": coreindex} return results
def computeallcpus(self): """ overall stats for all cores on the nodes """ ratios = numpy.empty((self._ncpumetrics, self._totalcores), numpy.double) coreindex = 0 for host, last in self._last.iteritems(): try: elapsed = last - self._first[host] if numpy.amin(numpy.sum(elapsed, 0)) < 1.0: # typically happens if the job was very short and the datapoints are too close together return {"error": ProcessingError.JOB_TOO_SHORT} coresperhost = len(last[0, :]) ratios[:, coreindex:( coreindex + coresperhost)] = 1.0 * elapsed / numpy.sum(elapsed, 0) coreindex += coresperhost except ValueError: # typically happens if the linux pmda crashes during the job return {"error": ProcessingError.INSUFFICIENT_DATA} results = {} for i, name in enumerate(self._outnames): results[name] = calculate_stats(ratios[i, :]) results['all'] = {"cnt": self._totalcores} return results
def results(self): result = {} for data in self._data.itervalues(): for i, devicename in enumerate(data['names']): if devicename not in result: result[devicename] = { 'gpuactive': [], 'memused': [], 'memactive': [] } for statname in ['gpuactive', 'memused', 'memactive']: result[devicename][statname].append( data[statname].mean()[i]) output = {} for device, data in result.iteritems(): output[device] = {} for statname, datalist in data.iteritems(): output[device][statname] = calculate_stats(datalist) if len(output) == 0: output['error'] = "no data" return output
def computeallcpus(self): """ overall stats for all cores on the nodes """ ratios = numpy.empty((self._ncpumetrics, self._totalcores), numpy.double) coreindex = 0 for host, last in self._last.iteritems(): try: elapsed = last - self._first[host] if numpy.amin(numpy.sum(elapsed, 0)) < 1.0: # typically happens if the job was very short and the datapoints are too close together return {"error": ProcessingError.JOB_TOO_SHORT} coresperhost = len(last[0, :]) ratios[:, coreindex:(coreindex+coresperhost)] = 1.0 * elapsed / numpy.sum(elapsed, 0) coreindex += coresperhost except ValueError: # typically happens if the linux pmda crashes during the job return {"error": ProcessingError.INSUFFICIENT_DATA} results = {} for i, name in enumerate(self._outnames): results[name] = calculate_stats(ratios[i, :]) results['all'] = {"cnt": self._totalcores} return results
def results(self): memused = [] memusedminus = [] for hostidx, memdata in self._data.iteritems(): if hostidx not in self._hostcpucounts: return {"error": ProcessingError.INSUFFICIENT_HOSTDATA} if memdata['used'].count() > 0: memused.append(memdata['used'].mean() / self._hostcpucounts[hostidx]) if memdata['usedminus'].count() > 0: memusedminus.append(memdata['usedminus'].mean() / self._hostcpucounts[hostidx]) if len(memused) == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} return {"used": calculate_stats(memused), "used_minus_cache": calculate_stats(memusedminus)}
def results(self): output = {} for i, nicename in enumerate(['drop', 'recv', 'send', 'drop_count', 'recv_count', 'send_count']): output[nicename] = calculate_stats(self._data[:self._hostidx, i]) return output
def results(self): if len(self._data) != self._job.nodecount: return {"error": ProcessingError.INSUFFICIENT_HOSTDATA} for hoststat in self._hostcounts.itervalues(): if hoststat['missing'] > hoststat['present']: return {"error": ProcessingError.CPUSET_UNKNOWN} stats = { "usage": { "avg": [], "max": [] }, "limit": [], "usageratio": { "avg": [], "max": [] } } datapoints = 0 for memdata in self._data.itervalues(): if memdata[0].count() > 0: datapoints += 1 stats["usage"]["avg"].append(memdata[0].mean()) stats["usage"]["max"].append(memdata[0].max) stats["limit"].append(memdata[1].max) stats["usageratio"]["avg"].append(memdata[2].mean()) stats["usageratio"]["max"].append(memdata[2].max) if datapoints == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} result = {"usage": {}, "usageratio": {}} result["usage"]["avg"] = calculate_stats(stats["usage"]["avg"]) result["usage"]["max"] = calculate_stats(stats["usage"]["max"]) result["limit"] = calculate_stats(stats["limit"]) result["usageratio"]["avg"] = calculate_stats( stats["usageratio"]["avg"]) result["usageratio"]["max"] = calculate_stats( stats["usageratio"]["max"]) return result
def results(self): output = {} for i, nicename in enumerate( ['drop', 'recv', 'send', 'drop_count', 'recv_count', 'send_count']): output[nicename] = calculate_stats(self._data[:self._hostidx, i]) return output
def results(self): memused = [] memusedminus = [] for hostidx, memdata in self._data.iteritems(): if hostidx not in self._hostcpucounts: return {"error": ProcessingError.INSUFFICIENT_HOSTDATA} if memdata['used'].count() > 0: memused.append(memdata['used'].mean() / self._hostcpucounts[hostidx]) if memdata['usedminus'].count() > 0: memusedminus.append(memdata['usedminus'].mean() / self._hostcpucounts[hostidx]) if len(memused) == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} return { "used": calculate_stats(memused), "used_minus_cache": calculate_stats(memusedminus) }
def results(self): result = {} for data in self._data.itervalues(): if data['power'].count() < 1: continue for i, devicename in enumerate(data['names']): if devicename not in result: result[devicename] = { "meanpower": [], "maxpower": [], "energy": [] } result[devicename]["meanpower"].append(data['power'].mean()[i]) result[devicename]["maxpower"].append(data['power'].max[i]) result[devicename]["energy"].append(data['energy'].total[i]) if not result: return {"error": ProcessingError.INSUFFICIENT_DATA} output = {} for device, data in result.iteritems(): output[device] = { "power": { "mean": calculate_stats(data['meanpower']), "max": calculate_stats(data['maxpower']) }, "energy": calculate_stats(data['energy']) } output[device]['energy']['total'] = output[device]['energy'][ 'avg'] * output[device]['energy']['cnt'] return output
def results(self): if self._error != None: return {"error": self._error} if len(self._data) == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} output = {} for metricname, metric in self._data.iteritems(): prettyname = "-".join(metricname.split(".")[1:]) output[prettyname] = calculate_stats(metric) return output
def results(self): if len(self._data) == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} output = {} for devicename, device in self._data.iteritems(): cleandevname = devicename.replace(".", "-") output[cleandevname] = {} for metricname, metric in device.iteritems(): prettyname = "-".join(metricname.split(".")[2:]) output[cleandevname][prettyname] = calculate_stats(metric) return output
def results(self): meanpower = [] maxpower = [] energy = [] time_covered = 0 for pdata in self._data.itervalues(): if pdata['power'].count() > 0: meanpower.append(pdata['power'].mean()) maxpower.append(pdata['power'].max) energy.append(pdata['energy'].total) time_covered += pdata['energy'].elapsed total_energy = numpy.sum(energy) if total_energy < numpy.finfo(numpy.float64).eps: return {"error": ProcessingError.RAW_COUNTER_UNAVAILABLE} if time_covered < 0.9 * self._job.nodecount * self._job.walltime: return {"error": ProcessingError.INSUFFICIENT_DATA} if not meanpower: return {"error": ProcessingError.INSUFFICIENT_DATA} energy_stats = calculate_stats(energy) energy_stats['total'] = total_energy return { "power": { "mean": calculate_stats(meanpower), "max": calculate_stats(maxpower) }, "energy": energy_stats }
def results(self): if self._error != None: return {"error": self._error} nhosts = len(self._data) if nhosts < 1: return {"error": ProcessingError.INSUFFICIENT_DATA} membw = numpy.zeros(nhosts) for hostindex, data in enumerate(self._data.itervalues()): membw[hostindex] = data * 64.0 results = {"membw": calculate_stats(membw)} return results
def results(self): if self._error != None: return {"error": self._error} if len(self._data) == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} output = {} for devicename, device in self._data.iteritems(): cleandevname = devicename.replace(".", "-") output[cleandevname] = {} for metricname, metric in device.iteritems(): prettyname = "-".join(metricname.split(".")[2:]) output[cleandevname][prettyname] = calculate_stats(metric) return output
def results(self): result = {} for data in self._data.itervalues(): for i, devicename in enumerate(data['names']): if devicename not in result: result[devicename] = {'gpuactive': [], 'memused': [], 'memactive': []} for statname in ['gpuactive', 'memused', 'memactive']: result[devicename][statname].append(data[statname].mean()[i]) output = {} for device, data in result.iteritems(): output[device] = {} for statname, datalist in data.iteritems(): output[device][statname] = calculate_stats(datalist) if len(output) == 0: output['error'] = "no data" return output
def results(self): memused = [] memusedminus = [] maxmemused = [] maxmemusedminus = [] memfree = [] maxmemfree = [] physmem = [] for hostidx, memdata in self._data.iteritems(): if memdata['free'].count() > 0: memfree.append(memdata['free'].mean()) maxmemfree.append(memdata['free'].max) if memdata['physmem'] != None: memused.append(memdata['physmem'] - memdata['free'].mean()) maxmemused.append(memdata['physmem'] - memdata['free'].min) physmem.append(memdata['physmem']) if memdata['cached'] != None: memusedminus.append(memdata['physmem'] - memdata['cached'].mean()) maxmemusedminus.append(memdata['physmem'] - memdata['cached'].min) if len(memused) == 0: return {"error": ProcessingError.INSUFFICIENT_DATA} result = { "used": calculate_stats(memused), "maxused": calculate_stats(maxmemused), "free": calculate_stats(memfree), "physmem": calculate_stats(physmem), "maxfree": calculate_stats(maxmemfree) } if len(memusedminus) > 0: result['used_minus_cache'] = calculate_stats(memusedminus) result['maxused_minus_cache'] = calculate_stats(maxmemusedminus) return result
def results(self): if self._error != None: return {"error": self._error} nhosts = len(self._data) if nhosts < 1: return {"error": ProcessingError.INSUFFICIENT_HOSTDATA} hasFlops = True hasCpld = True clks = numpy.zeros(self._totalcores) flops = numpy.zeros(self._totalcores) cpiref = numpy.zeros(self._totalcores) cpldref = numpy.zeros(self._totalcores) coreindex = 0 for _, data in self._data.iteritems(): if len(data) == len(NHM_METRICS): # also covers the AMD_INTERLAGOS flops[coreindex:coreindex + len(data[0])] = 1.0 * data[3] cpiref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[1] cpldref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[2] clks[coreindex:coreindex + len(data[0])] = data[0] / 1232896.0 coreindex += len(data[0]) elif len(data) == len(SNB_METRICS): flops[coreindex:coreindex + len(data[0])] = 4.0 * data[ 3] + 2.0 * data[4] + 1.0 * data[5] + 1.0 * data[6] cpiref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[1] cpldref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[2] clks[coreindex:coreindex + len(data[0])] = data[0] / 1232896.0 coreindex += len(data[0]) elif len(data) == len(GENERIC_INTEL_METRICS): hasFlops = False cpiref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[1] cpldref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[2] clks[coreindex:coreindex + len(data[0])] = data[0] / 1232896.0 coreindex += len(data[0]) elif len(data) == len( GENERIC_INTEL_ALT_METRICS): # also covers the ALT2 variant hasFlops = False hasCpld = False cpiref[coreindex:coreindex + len(data[0])] = 1.0 * data[0] / data[1] clks[coreindex:coreindex + len(data[0])] = data[0] / 1232896.0 coreindex += len(data[0]) else: return {"error": ProcessingError.INSUFFICIENT_DATA} results = {} if hasFlops: results['flops'] = calculate_stats(flops) if numpy.isnan(cpiref).any(): results['cpiref'] = { "error": ProcessingError.RAW_COUNTER_UNAVAILABLE } else: results['cpiref'] = calculate_stats(cpiref) if (not hasCpld) or numpy.isnan(cpldref).any(): results['cpldref'] = { "error": ProcessingError.RAW_COUNTER_UNAVAILABLE } else: results['cpldref'] = calculate_stats(cpldref) results['clk_mhz'] = calculate_stats(clks) return results
def results(self): return {"cores": calculate_stats(self.cores)}