def io_sizes(intervals, read=True): sizes = {} for filename in intervals: if ignore_files(filename): continue for interval in intervals[filename]: io_size , isRead = interval[4], interval[5] if read != isRead: continue if io_size not in sizes: sizes[io_size] = 0 sizes[io_size] += 1 xs = sorted(sizes.keys()) ys = [ sizes[x] for x in xs ] xs = [ str(x) for x in xs ] p = figure(x_range=xs, x_axis_label="IO Size", y_axis_label="Count", y_axis_type='log', plot_width=500 if not read else 400, plot_height=350) p.vbar(x=xs, top=ys, width=0.6, bottom=1) p.xaxis.major_label_orientation = math.pi/2 labels = LabelSet(x='x', y='y', text='y', level='glyph', x_offset=-10, y_offset=0, text_font_size="10pt", source=ColumnDataSource(dict(x=xs ,y=ys)), render_mode='canvas') p.add_layout(labels) script, div = components(p) if read: htmlWriter.readIOSizes = div + script else: htmlWriter.writeIOSizes = div + script
def file_counts(): y = [] for LM in reader.LMs: num = 0 for i in range(LM.num_files): filename = LM.filenames[i] if not ignore_files(filename): num += 1 y.append(num) x = range(reader.GM.total_ranks) p = figure(x_axis_label="Rank", y_axis_label="Number of files accessed", plot_width=400, plot_height=300) p.vbar(x=x, top=y, width=0.6) script, div = components(p) htmlWriter.fileCount = div+script
def file_counts(): y = [] for meta in reader.localMetadata: num = 0 for fileInfo in meta.fileMap: filename = fileInfo[2] if not ignore_files(filename): num += 1 y.append(num) x = range(reader.globalMetadata.numRanks) p = figure(x_axis_label="Rank", y_axis_label="Number of files accessed", plot_width=400, plot_height=300) #p.xaxis[0].ticker = FixedTicker(ticks=range(reader.globalMetadata.numRanks)) p.vbar(x=x, top=y, width=0.6) script, div = components(p) htmlWriter.fileCount = div + script
def function_patterns(all_intervals): # 1,2,3 - consecutive # 1,3,9 - sequential # 1,3,2 - random x = {'consecutive':0, 'sequential':0, 'random':0} for filename in all_intervals.keys(): if ignore_files(filename): continue intervals = sorted(all_intervals[filename], key=lambda x: x[1]) # sort by tstart ''' This code consider each rank separately lastOffsets = [0] * reader.globalMetadata.numRanks for interval in intervals: rank, offset, count = interval[0], interval[3], interval[4] lastOffset = lastOffsets[rank] if (offset + count) == lastOffset: x['consecutive'] += 1 elif (offset + count) > lastOffset: x['sequential'] += 1 else: #print filename, interval x['random'] += 1 lastOffsets[rank] = offset + count ''' for i in range(len(intervals)-1): i1, i2 = intervals[i], intervals[i+1] offset1, count1 = i1[3], i1[4] offset2, count2 = i2[3], i2[4] if (offset1 + count1) == offset2: x['consecutive'] += 1 elif (offset1 + count1) < offset2: x['sequential'] += 1 else: x['random'] += 1 total = x['consecutive'] + x['sequential'] + x['random'] print("consecutive:", x['consecutive'] ) print("sequential:", x['sequential'] ) print("random:", x['random']) script, div = components(pie_chart(x)) htmlWriter.functionPatterns = script+div
def offset_vs_time(intervals): # interval = [rank, tstart, tend, offset, count] def plot_for_one_file(filename, intervals): intervals = sorted(intervals, key=lambda x: x[1]) # sort by tstart x_read, y_read, x_write, y_write, nan = [], [], [], [], float('nan') for interval in intervals: tstart, tend, offset, count, isRead = interval[1], interval[2], interval[3], interval[4], interval[5] if isRead: x_read += [tstart, tend, nan] y_read += [offset, offset+count, offset+count] else: x_write += [tstart, tend, nan] y_write += [offset, offset+count, offset+count] if len(x_read) > 0 : x_read = x_read[0:len(x_read)-1] if len(y_read) > 0 : y_read = y_read[0:len(y_read)-1] if len(x_write) > 0 : x_write = x_write[0:len(x_write)-1] if len(y_write) > 0 : y_write = y_write[0:len(y_write)-1] p = figure(title=filename.split("/")[-1], x_axis_label="Time", y_axis_label="Offset") p.line(x_read, y_read, line_color='blue', line_width=2, alpha=1.0, legend_label="read") p.line(x_write, y_write, line_color='red', line_width=2, alpha=1.0, legend_label="write") return p plots = [] idx = 0 for filename in intervals: if ignore_files(filename): continue if 'junk' in filename and int(filename.split('junk.')[-1]) > 0: continue # NWChem if 'pout' in filename and int(filename.split('pout.')[-1]) > 0: continue # Chombo if idx < 16 and (len(intervals[filename]) > 0): # only show 12 files at most p = plot_for_one_file(filename, intervals[filename]) plots.append(p) idx += 1 from bokeh.layouts import gridplot script, div = components(gridplot(plots, ncols=3, plot_width=400, plot_height=300)) htmlWriter.offsetVsTime = script+div
def file_access_mode(): f = {} f["O_RDONLY"], f["O_WRONLY"], f["O_RDWR"], f["O_NONBLOCK"], f[ "O_APPEND"] = 0x000, 0x0001, 0x0002, 0x0004, 0x0008 f["O_SHLOCK"], f["O_EXLOCK"], f["O_ASYNC"], f[ "O_FSYNC"] = 0x0010, 0x0020, 0x0040, 0x0080 f["O_CREAT"], f["O_TRUNC"], f["O_EXCL"] = 0x0200, 0x0400, 0x0800 # Initialize flags_set = {} accesses_set = {} sizes_set = {} for localMetadata in reader.localMetadata: for fileInfo in localMetadata.fileMap: filename = fileInfo[2] flags_set[filename] = set() sizes_set[filename] = fileInfo[1] accesses_set[filename] = {'read': False, 'write': False} def get_flag_str(flag_int): flags = "" for flag_str in f.keys(): if flag_int & f[flag_str] > 0: flags += (" | " + flag_str) return "O_RDONLY" if flags == "" else flags[3:] for rank in range(reader.globalMetadata.numRanks): fileMap = reader.localMetadata[rank].fileMap for record in reader.records[rank]: funcname = reader.globalMetadata.funcs[record[3]] if "dir" in funcname or "MPI" in funcname or "H5" in funcname: continue if "open" in funcname: fileId = int(record[4][0]) filename = fileMap[fileId][2] flagStr = "" if funcname == "open" or funcname == "open64": flagStr = get_flag_str(int(record[4][1])) elif "fopen" in funcname or "fdopen" in funcname: flagStr = record[4][1] else: print("Not regonized: ", funcname) flags_set[filename].add(flagStr) if "fprintf" in funcname or "read" in funcname or "write" in funcname: if "fread" in funcname or "fwrite" in funcname: fileId = int(record[4][3]) else: fileId = int(record[4][0]) filename = fileMap[fileId][2] if "read" in funcname: accesses_set[filename]["read"] = True if "write" in funcname or "fprintf" in funcname: accesses_set[filename]["write"] = True table = PrettyTable() table.field_names = [ 'Filename', 'File Size', 'Open Flags', 'Read', 'Write' ] for filename in flags_set: if (not ignore_files(filename)): table.add_row([filename, sizes_set[filename], list(flags_set[filename]),\ accesses_set[filename]['read'], accesses_set[filename]['write']]) htmlWriter.fileAccessModeTable = table.get_html_string()
def file_access_patterns(intervals): def pattern_for_one_file(filename, intervals): pattern = { "RAR": { 'S': False, 'D': False }, "RAW": { 'S': False, 'D': False }, "WAW": { 'S': False, 'D': False }, "WAR": { 'S': False, 'D': False } } intervals = sorted(intervals, key=lambda x: x[3]) # sort by starting offset for i in range(len(intervals) - 1): i1, i2 = intervals[i], intervals[i + 1] tstart1, offset1, count1, sessions1 = i1[1], i1[3], i1[4], i1[6] tstart2, offset2, count2, sessions2 = i2[1], i2[3], i2[4], i2[6] # no overlapping if offset1 + count1 <= offset2: continue if len(sessions1) == 0 or len(sessions2) == 0: print("Without a session? ", filename, i1, i2) # has overlapping but may not conflicting # if sessions1 intersets sessions2, and # one of the common sessions is the local session # then there's a conflict if not (sessions1[0] in sessions2 or sessions2[0] in sessions1): continue print(filename, i1, i2) isRead1 = i1[5] if tstart1 < tstart2 else i2[5] isRead2 = i2[5] if tstart2 > tstart1 else i1[5] rank1 = i1[0] if tstart1 < tstart2 else i2[0] rank2 = i2[0] if tstart2 > tstart1 else i1[0] # overlap if isRead1 and isRead2: # RAR if rank1 == rank2: pattern['RAR']['S'] = True else: pattern['RAR']['D'] = True if isRead1 and not isRead2: # WAR if rank1 == rank2: pattern['WAR']['S'] = True else: pattern['WAR']['D'] = True if not isRead1 and not isRead2: # WAW if rank1 == rank2: pattern['WAW']['S'] = True else: pattern['WAW']['D'] = True if not isRead1 and isRead2: # RAW if rank1 == rank2: pattern['RAW']['S'] = True else: pattern['RAW']['D'] = True return pattern table = PrettyTable() table.field_names = ['Filename', 'RAR(Same Rank)', 'RAW(Same Rank)', 'WAW(Same Rank)', 'WAR(Same Rank)', \ 'RAR(Different Rank)', 'RAW(Different Rank)', 'WAW(Different Rank)', 'WAR(Different Rank)'] for filename in intervals.keys(): if not ignore_files(filename): pattern = pattern_for_one_file(filename, intervals[filename]) table.add_row([filename, \ pattern['RAR']['S'], pattern['RAW']['S'], pattern['WAW']['S'], pattern['WAR']['S'], \ pattern['RAR']['D'], pattern['RAW']['D'], pattern['WAW']['D'], pattern['WAR']['D']]) htmlWriter.fileAccessPatterns = table.get_html_string()