def _getTransmitReceive(): """ Read in the /dev/bifrost ProcLog data and return block-level information about udp* blocks. """ ## Find all running processes pidDirs = glob.glob(os.path.join(BIFROST_STATS_BASE_DIR, '*')) pidDirs.sort() ## Load the data blockList = {} for pidDir in pidDirs: pid = int(os.path.basename(pidDir), 10) contents = load_by_pid(pid) for block in contents.keys(): if block[:3] != 'udp': continue t = time.time() try: log = contents[block]['stats'] good = log['ngood_bytes'] missing = log['nmissing_bytes'] invalid = log['ninvalid_bytes'] late = log['nlate_bytes'] nvalid = log['nvalid'] except KeyError: good, missing, invalid, late, nvalid = 0, 0, 0, 0, 0 blockList['%i-%s' % (pid, block)] = { 'pid': pid, 'name': block, 'time': t, 'good': good, 'missing': missing, 'invalid': invalid, 'late': late, 'nvalid': nvalid } return blockList
def main(args): pidDirs = glob.glob(os.path.join(BIFROST_STATS_BASE_DIR, '*')) pidDirs.sort() for pidDir in pidDirs: pid = int(os.path.basename(pidDir), 10) if pid != args.pid: continue contents = load_by_pid(pid) details = get_process_details(pid) cmd = get_command_line(pid) if cmd == '' and details['user'] == '': continue # Assign unique one-character IDs to each block lut = {} for i, block in enumerate(contents.keys()): lut[block] = chr(i + 97) # Find chains of linked blocks sources, sinks, chains, associations = get_data_flows(contents) # Add in network sources, if needed i = len(contents.keys()) for block in sources: if block.startswith('udp'): nsrc = None try: nsrc = contents[block]['sizes']['nsrc'] except KeyError: pass if nsrc is not None: name = '%s\\nx%i' % (args.source_name, nsrc) lut[name] = chr(i + 97) i += 1 chains.append({'link': (name, block), 'dtype': 'UDP'}) # Trim the command line if cmd.startswith('python'): cmd = cmd.split(None, 1)[1] cmd = cmd.split(None, 1)[0] cmd = os.path.basename(cmd) # Create the DOT output print("digraph graph%i {" % pid) ## Graph label print(' labelloc="t"') print(' label="Pipeline: %s\\n "' % cmd) ## Block identiers for block in sorted(lut): ### Is the block actually used? found = False for chain in chains: for link in chain['link']: if link == block: found = True break if found: break if not found and not args.no_associations: for assoc0, assoc1 in associations: if assoc0 == block: found = True break elif assoc1 == block: found = True break if found: ### Yes, add it to the graph with the correct label ## CPU info - if avaliable if not block.startswith('%s\\nx' % args.source_name): try: cpu = contents[block]['bind']['core0'] cpu = '\\nCPU%i' % cpu except KeyError: cpu = "\\nUnbound" else: cpu = '' ## Shape - based on function (source vs. sink vs. connection) shape = 'box' if block in sources: shape = 'ellipse' if block in sinks: shape = 'diamond' ## Add it to the list print(' %s [label="%s%s" shape="%s"]' % (lut[block], block, cpu, shape)) ## Chains for chain in chains: ### Extract the data type, if known dtype = chain['dtype'] if dtype is None: dtype = '' else: dtype = ' %s' % dtype ### Add it to the list print(' %s -> %s [label="%s"]' % (lut[chain['link'][0]], lut[chain['link'][1]], dtype)) ## Associations if not args.no_associations: for assoc0, assoc1 in associations: print(' %s -> %s [style="dotted" dir="both"]' % (lut[assoc0], lut[assoc1])) print("}")
def main(args): hostname = socket.gethostname() scr = curses.initscr() curses.noecho() curses.cbreak() scr.keypad(1) scr.nodelay(1) size = scr.getmaxyx() std = curses.A_NORMAL rev = curses.A_REVERSE poll_interval = 1.0 tLastPoll = 0.0 sort_key = 'process' sort_rev = True display_gpu = False try: while True: t = time.time() ## Interact with the user c = scr.getch() curses.flushinp() if c == ord('q'): break elif c == ord('i'): new_key = 'pid' elif c == ord('b'): new_key = 'name' elif c == ord('c'): new_key = 'core' elif c == ord('t'): new_key = 'total' elif c == ord('a'): new_key = 'acquire' elif c == ord('p'): new_key = 'process' elif c == ord('r'): new_key = 'reserve' try: if sort_key == new_key: sort_rev = not sort_rev else: sort_key = new_key sort_rev = True del new_key except NameError: pass ## Do we need to poll the system again? if t - tLastPoll > poll_interval: ## Load in the various bits form /proc that we need load = get_load_average() cpu = get_processor_usage() mem = get_memory_swap_usage() gpu = get_gpu_memory_usage() ## Determine if we have GPU data to display if gpu['devCount'] > 0: display_gpu = True ## Find all running processes pidDirs = glob.glob(os.path.join(BIFROST_STATS_BASE_DIR, '*')) pidDirs.sort() ## Load the data blockList = {} for pidDir in pidDirs: pid = int(os.path.basename(pidDir), 10) contents = load_by_pid(pid) cmd = get_command_line(pid) if cmd == '': continue for block in contents.keys(): try: log = contents[block]['bind'] cr = log['core0'] except KeyError: continue try: log = contents[block]['perf'] ac = max([0.0, log['acquire_time']]) pr = max([0.0, log['process_time']]) re = max([0.0, log['reserve_time']]) except KeyError: ac, pr, re = 0.0, 0.0, 0.0 blockList['%i-%s' % (pid, block)] = { 'pid': pid, 'name': block, 'cmd': cmd, 'core': cr, 'acquire': ac, 'process': pr, 'reserve': re, 'total': ac + pr + re } ## Sort order = sorted(blockList, key=lambda x: blockList[x][sort_key], reverse=sort_rev) ## Mark tLastPoll = time.time() ## Display k = 0 ### General - load average output = '%s - %s - load average: %s, %s, %s\n' % ( os.path.basename(__file__), hostname, load['1min'], load['5min'], load['10min']) k = _add_line(scr, k, 0, output, std) ### General - process counts output = 'Processes: %s total, %s running\n' % ( load['procTotal'], load['procRunning']) k = _add_line(scr, k, 0, output, std) ### General - average processor usage c = cpu['avg'] output = 'CPU(s):%5.1f%%us,%5.1f%%sy,%5.1f%%ni,%5.1f%%id,%5.1f%%wa,%5.1f%%hi,%5.1f%%si,%5.1f%%st\n' % ( 100.0 * c['user'], 100.0 * c['sys'], 100.0 * c['nice'], 100.0 * c['idle'], 100.0 * c['wait'], 100.0 * c['irq'], 100.0 * c['sirq'], 100.0 * c['steal']) k = _add_line(scr, k, 0, output, std) ### General - memory output = 'Mem: %9ik total, %9ik used, %9ik free, %9ik buffers\n' % ( mem['memTotal'], mem['memUsed'], mem['memFree'], mem['buffers']) k = _add_line(scr, k, 0, output, std) ### General - swap output = 'Swap: %9ik total, %9ik used, %9ik free, %9ik cached\n' % ( mem['swapTotal'], mem['swapUsed'], mem['swapFree'], mem['cached']) k = _add_line(scr, k, 0, output, std) ### General - GPU, if avaliable if display_gpu: if gpu['pwrLimit'] != 0.0: if gpu['load'] != 0.0: output = 'GPU(s): %9ik total, %9ik used, %9ik free, %5.1f%%us, %.0f/%.0fW\n' % ( gpu['memTotal'], gpu['memUsed'], gpu['memFree'], gpu['load'], gpu['pwrDraw'], gpu['pwrLimit']) else: output = 'GPU(s): %9ik total, %9ik used, %9ik free, %.0f/%.0fW\n' % ( gpu['memTotal'], gpu['memUsed'], gpu['memFree'], gpu['pwrDraw'], gpu['pwrLimit']) else: output = 'GPU(s): %9ik total, %9ik used, %9ik free, %i device(s)\n' % ( gpu['memTotal'], gpu['memUsed'], gpu['memFree'], gpu['devCount']) k = _add_line(scr, k, 0, output, std) ### Header k = _add_line(scr, k, 0, ' ', std) output = '%6s %15s %4s %5s %7s %7s %7s %7s Cmd' % ( 'PID', 'Block', 'Core', '%CPU', 'Total', 'Acquire', 'Process', 'Reserve') csize = size[1] - len(output) output += ' ' * csize output += '\n' k = _add_line(scr, k, 0, output, rev) ### Data for o in order: d = blockList[o] try: c = 100.0 * cpu[d['core']]['total'] c = '%5.1f' % c except KeyError: c = '%5s' % ' ' output = '%6i %15s %4i %5s %7.3f %7.3f %7.3f %7.3f %s' % ( d['pid'], d['name'][:15], d['core'], c, d['total'], d['acquire'], d['process'], d['reserve'], d['cmd'][:csize + 3]) k = _add_line(scr, k, 0, output, std) if k >= size[0] - 1: break ### Clear to the bottom scr.clrtobot() ### Refresh scr.refresh() ## Sleep time.sleep(_REDRAW_INTERVAL_SEC) except KeyboardInterrupt: pass except Exception as error: exc_type, exc_value, exc_traceback = sys.exc_info() fileObject = StringIO() traceback.print_tb(exc_traceback, file=fileObject) tbString = fileObject.getvalue() fileObject.close() # Save the window contents contents = '' y, x = scr.getmaxyx() for i in range(y - 1): for j in range(x): d = scr.inch(i, j) c = d & 0xFF a = (d >> 8) & 0xFF contents += chr(c) # Tear down curses scr.keypad(0) curses.echo() curses.nocbreak() curses.endwin() # Final reporting try: ## Error print("%s: failed with %s at line %i" % (os.path.basename(__file__), str(error), traceback.tb_lineno(exc_traceback))) for line in tbString.split('\n'): print(line) except NameError: ## Last window contents sans attributes print(contents)
def main(args): # Find out the kernel page size, both regular and huge ## Regular pageSize = subprocess.check_output(['getconf', 'PAGESIZE']) pageSize = int(pageSize, 10) ## Huge - assumed that the value is in kB hugeSize = subprocess.check_output(['grep', 'Hugepagesize', '/proc/meminfo']) hugeSize = int(hugeSize.split()[1], 10) * 1024 # Load in the bifrost ring information for this process contents = load_by_pid(args.pid, include_rings=True) rings = {} for block in contents.keys(): if block == 'rings': for ring in contents[block].keys(): rings[ring] = {} for key in contents[block][ring]: rings[ring][key] = contents[block][ring][key] continue if not rings: raise RuntimeError("Cannot find bifrost ring info for PID: %i" % args.pid) # Load in the NUMA map page for this process try: fh = open('/proc/%i/numa_maps' % args.pid, 'r') numaInfo = fh.read() fh.close() except IOError: raise RuntimeError("Cannot find NUMA memory info for PID: %i" % args.pid) # Parse out the anonymous entries in this file _numaRE = re.compile('(?P<addr>[0-9a-f]+).*[(anon)|(mapped)]=(?P<size>\d+).*(swapcache=(?P<swap>\d+))?.*N(?P<binding>\d+)=(?P<size2>\d+)') areas = {} files = {} for line in numaInfo.split('\n'): ## Skp over blank lines, files, and anything that is not anonymous if len(line) < 3: continue elif line.find('file=') != -1: ## Run regex over the line to get the address, size, and binding information mtch = _numaRE.search(line) if mtch is not None: ### Basic info heap = True if line.find('heap') != -1 else False stack = True if line.find('stack') != -1 else False huge = True if line.find('huge') != -1 else False share = True if line.find('mapmax=') != -1 else False ### Detailed info addr = mtch.group('addr') size = int(mtch.group('size'), 10) size *= hugeSize if huge else pageSize try: ssize = int(mtch.group('swap'), 10) swap = True except TypeError: ssize = 0 swap = False ssize *= hugeSize if huge else pageSize node = int(mtch.group('binding'), 10) ### Save files[addr] = {'size':size, 'node':node, 'huge':huge, 'heap':heap, 'stack':stack, 'shared':share, 'swapped':swap, 'swapsize':ssize} elif line.find('anon=') != -1: ## Run regex over the line to get the address, size, and binding information mtch = _numaRE.search(line) if mtch is not None: ### Basic info heap = True if line.find('heap') != -1 else False stack = True if line.find('stack') != -1 else False huge = True if line.find('huge') != -1 else False share = True if line.find('mapmax=') != -1 else False ### Detailed info addr = mtch.group('addr') size = int(mtch.group('size'), 10) size *= hugeSize if huge else pageSize try: ssize = int(mtch.group('swap'), 10) swap = True except TypeError: ssize = 0 swap = False ssize *= hugeSize if huge else pageSize node = int(mtch.group('binding'), 10) ### Save areas[addr] = {'size':size, 'node':node, 'huge':huge, 'heap':heap, 'stack':stack, 'shared':share, 'swapped':swap, 'swapsize':ssize} # Try to match the rings to the memory areas matched = [] for ring in rings: stride = rings[ring]['stride'] best = None metric = 1e13 for addr in areas: diff = abs(areas[addr]['size'] - stride) if diff < metric: best = addr metric = diff rings[ring]['addr'] = best matched.append( best ) # Take a look at how the areas are bound nodeCountsAreas = {} nodeSizesAreas = {} for addr in areas: node = areas[addr]['node'] size = areas[addr]['size'] try: nodeCountsAreas[node] += 1 nodeSizesAreas[node] += size except KeyError: nodeCountsAreas[node] = 1 nodeSizesAreas[node] = size nodeCountsFiles = {} nodeSizesFiles = {} for addr in files: node = files[addr]['node'] size = files[addr]['size'] try: nodeCountsFiles[node] += 1 nodeSizesFiles[node] += size except KeyError: nodeCountsFiles[node] = 1 nodeSizesFiles[node] = size # Final report print("Rings: %i" % len(rings)) print("File Backed Memory Areas:") print(" Total: %i" % len(files)) print(" Heap: %i" % len([addr for addr in files if files[addr]['heap']])) print(" Stack: %i" % len([addr for addr in files if files[addr]['stack']])) print(" Shared: %i" % len([addr for addr in files if files[addr]['shared']])) print(" Swapped: %i" % len([addr for addr in files if files[addr]['swapped']])) for node in sorted(nodeCountsFiles.keys()): print(" NUMA Node %i:" % node) print(" Count: %i" % nodeCountsFiles[node]) print(" Size: %.3f %s" % get_best_size(nodeSizesFiles[node])) print("Anonymous Memory Areas:") print(" Total: %i" % len(areas)) print(" Heap: %i" % len([addr for addr in areas if areas[addr]['heap']])) print(" Stack: %i" % len([addr for addr in areas if areas[addr]['stack']])) print(" Shared: %i" % len([addr for addr in areas if areas[addr]['shared']])) print(" Swapped: %i" % len([addr for addr in areas if areas[addr]['swapped']])) for node in sorted(nodeCountsAreas.keys()): print(" NUMA Node %i:" % node) print(" Count: %i" % nodeCountsAreas[node]) print(" Size: %.3f %s" % get_best_size(nodeSizesAreas[node])) print(" ") print("Ring Mappings:") for ring in sorted(rings): print(" %s" % ring) try: area = areas[rings[ring]['addr']] except KeyError: print(" Unknown") continue sv, su = get_best_size(area['size']) diff = abs(area['size'] - rings[ring]['stride']) status = '' if diff > 0.5*hugeSize: status = '???' dv, du = get_best_size(diff) sf = float(area['swapsize'])/float(area['size']) print(" Size: %.3f %s" % get_best_size(rings[ring]['stride'])) print(" Area: %s %s" % (rings[ring]['addr'], status)) print(" Size: %.3f %s%s" % (sv, su, ' (within %.3f %s)' % (dv, du) if diff != 0 else '')) print(" Node: %i" % area['node']) print(" Attributes:") print(" Huge? %s" % area['huge']) print(" Heap? %s" % area['heap']) print(" Stack? %s" % area['stack']) print(" Shared? %s" % area['shared']) print(" Swap Status:") print(" Swapped? %s" % area['swapped']) if area['swapped']: print(" Swap Fraction: %.1f%%" % (100.0*sf,)) print(" ") print("Other Non-Ring Areas:") print(" Size: %.3f %s" % get_best_size(sum([areas[area]['size'] for area in areas if area not in matched]))) print(" ") print("File Backed Areas:") print(" Size: %.3f %s" % get_best_size(sum([files[area]['size'] for area in files])))
def main(args): pidDirs = glob.glob(os.path.join(BIFROST_STATS_BASE_DIR, '*')) pidDirs.sort() for pidDir in pidDirs: pid = int(os.path.basename(pidDir), 10) contents = load_by_pid(pid) details = get_process_details(pid) cmd = get_command_line(pid) if cmd == '' and details['user'] == '': continue print("PID: %i" % pid) print(" Command: %s" % cmd) print(" User: %s" % details['user']) print(" CPU Usage: %.1f%%" % details['cpu']) print(" Memory Usage: %.1f%%" % details['mem']) print(" Elapsed Time: %s" % details['etime']) print(" Thread Count: %i" % details['threads']) print(" Rings:") rings = [] ring_details = {} for block in contents.keys(): if block == 'rings': for ring in contents[block].keys(): ring_details[ring] = {} for key in contents[block][ring]: ring_details[ring][key] = contents[block][ring][key] continue for log in contents[block].keys(): if log not in ('in', 'out'): continue for key in contents[block][log]: if key[:4] == 'ring': value = contents[block][log][key] if value not in rings: rings.append(value) for i, ring in enumerate(rings): try: dtls = ring_details[ring] sz, un = get_best_size(dtls['stride'] * dtls['nringlet']) print(" %i: %s on %s of size %.1f %s" % (i, ring, dtls['space'], sz, un)) except KeyError: print(" %i: %s" % (i, ring)) print(" Blocks:") for block in contents.keys(): if block == 'rings': continue rins, routs = [], [] for log in contents[block].keys(): if log not in ('in', 'out'): continue for key in contents[block][log]: if key[:4] == 'ring': value = contents[block][log][key] if log == 'in': if value not in rins: rins.append(value) else: if value not in routs: routs.append(value) print(" %s" % block) if len(rins) > 0: print(" -> read ring(s): %s" % (" ".join(["%i" % rings.index(v) for v in rins]), )) if len(routs) > 0: print(" -> write ring(s): %s" % (" ".join(["%i" % rings.index(v) for v in routs]), )) if len(contents[block].keys()) > 0: print(" -> log(s): %s" % (" ".join(contents[block].keys()), ))
def _update(self): new_state = load_by_pid(self._pid) new_state_time = time.time() self._state.append((new_state_time,new_state))
def main(args): config = parseOptions(args) hostname = socket.gethostname() scr = curses.initscr() curses.noecho() curses.cbreak() scr.keypad(1) scr.nodelay(1) size = scr.getmaxyx() std = curses.A_NORMAL rev = curses.A_REVERSE poll_interval = 1.0 tLastPoll = 0.0 try: while True: t = time.time() ## Interact with the user c = scr.getch() curses.flushinp() if c == ord('q'): break ## Do we need to poll the system again? if t-tLastPoll > poll_interval: ## Load in the various bits form /proc that we need load = _getLoadAverage() cpu = _getProcessorUsage() mem = _getMemoryAndSwapUsage() ## Find all running processes pidDirs = glob.glob(os.path.join(BIFROST_STATS_BASE_DIR, '*')) pidDirs.sort() ## Load the data blockList = {} for pidDir in pidDirs: pid = int(os.path.basename(pidDir), 10) contents = load_by_pid(pid) cmd = _getCommandLine(pid) if cmd == '': continue for block in contents.keys(): try: log = contents[block]['bind'] cr = log['core0'] except KeyError: continue try: log = contents[block]['perf'] ac = max([0.0, log['acquire_time']]) pr = max([0.0, log['process_time']]) re = max([0.0, log['reserve_time']]) except KeyError: ac, pr, re = 0.0, 0.0, 0.0 blockList['%i-%s' % (pid, block)] = {'pid': pid, 'name':block, 'cmd': cmd, 'core': cr, 'acquire': ac, 'process': pr, 'reserve': re} ## Sort order = sorted(blockList, key=lambda x: blockList[x]['process'], reverse=True) ## Mark tLastPoll = time.time() ## Display k = 0 ### General - load average output = '%s - %s - load average: %s, %s, %s\n' % (os.path.basename(__file__), hostname, load['1min'], load['5min'], load['10min']) k = _addLine(scr, k, 0, output, std) ### General - process counts output = 'Processes: %s total, %s running\n' % (load['procTotal'], load['procRunning']) k = _addLine(scr, k, 0, output, std) ### General - average processor usage c = cpu['avg'] output = 'CPU(s):%5.1f%%us,%5.1f%%sy,%5.1f%%ni,%5.1f%%id,%5.1f%%wa,%5.1f%%hi,%5.1f%%si,%5.1f%%st\n' % (100.0*c['user'], 100.0*c['sys'], 100.0*c['nice'], 100.0*c['idle'], 100.0*c['wait'], 100.0*c['irq'], 100.0*c['sirq'], 100.0*c['steal']) k = _addLine(scr, k, 0, output, std) ### General - memory output = 'Mem: %9ik total, %9ik used, %9ik free, %9ik buffers\n' % (mem['memTotal'], mem['memUsed'], mem['memFree'], mem['buffers']) k = _addLine(scr, k, 0, output, std) ### General - swap output = 'Swap: %9ik total, %9ik used, %9ik free, %9ik cached\n' % (mem['swapTotal'], mem['swapUsed'], mem['swapFree'], mem['cached']) k = _addLine(scr, k, 0, output, std) ### Header k = _addLine(scr, k, 0, ' ', std) output = '%6s %15s %4s %5s %7s %7s %7s %7s Cmd' % ('PID', 'Block', 'Core', '%CPU', 'Total', 'Acquire', 'Process', 'Reserve') csize = size[1]-len(output) output += ' '*csize output += '\n' k = _addLine(scr, k, 0, output, rev) ### Data for o in order: d = blockList[o] try: c = 100.0*cpu[d['core']]['total'] c = '%5.1f' % c except KeyError: c = '%5s' % ' ' output = '%6i %15s %4i %5s %7.3f %7.3f %7.3f %7.3f %s' % (d['pid'], d['name'][:15], d['core'], c, d['acquire']+d['process']+d['reserve'], d['acquire'], d['process'], d['reserve'], d['cmd'][:csize+3]) k = _addLine(scr, k, 0, output, std) if k >= size[0] - 1: break ### Clear to the bottom scr.clrtobot() ### Refresh scr.refresh() ## Sleep time.sleep(_REDRAW_INTERVAL_SEC) except KeyboardInterrupt: pass curses.nocbreak() scr.keypad(0) curses.echo() curses.endwin()