def get_blocks2files_map(img_file, directories): """ input: img_file output: [ ['inode', 'file_name', 'file_blocks'], ['inode', 'file_name', 'file_blocks'], ... ] """ out_file = debugfs_ls(img_file) out_lists = lines2lists(out_file) #directory = "myEnvironment" for directory in directories: out_file = debugfs_ls_dir(img_file, directory) subdir_inode_lists = lines2lists(out_file) out_lists = out_lists + subdir_inode_lists ret_lists = [] for line in out_lists: if len(line) == 9: inode = line[0] file_name = line[-1] # print inode + " " + file_name new_line = [inode, file_name] out_file = debugfs_blocks(img_file, inode) #out_file content e.g.: # debugfs 1.42 (29-Nov-2011) # debugfs: blocks <11> # 518 519 520 521 522 523 524 525 526 527 528 529 for out_line in out_file.readlines(): if "debugfs" not in out_line: #last line blocks #print out_line new_line.append(out_line) ret_lists.append(new_line) #add journal blocks journal_inode = '8'#journal inode is typically 8 new_line = [journal_inode, 'fs-journal'] out_file = debugfs_blocks(img_file, journal_inode) for out_line in out_file.readlines(): if "debugfs" not in out_line: #last line blocks #print out_line new_line.append(out_line) ret_lists.append(new_line) return ret_lists
def get_io_list(io_file): ret_list = [] io_lists = lines2lists(io_file) #a line(list): #1392469373081108 IO# 1 1024 1024 000100000004000000003333000342d20000ff8d000000010000000000000000 for line in io_lists: io_num = line[2] offset = line[3] #byte_offset = int(line[3]) #blk_size = 1024 #blk_offset = str(byte_offset/blk_size) #blk_offset = str(byte_offset) #ret_list.append(blk_offset) io_str = io_num + " " + offset ret_list.append(io_str) return ret_list
#!/usr/bin/env python #map each splitted IO (default 4KB) to the original request (maybe > 4KB) #./THISFILE formatted_io IO_HEAD_LOG #./THISFILE formatted.io.txt pfe_io_header_log.work.od > io2req.txt import sys import re from format_strace import lines2lists if __name__ == "__main__": #for fn in sys.argv[1]: formatted_io_fn = sys.argv[1] formatted_io_f = open(formatted_io_fn) formatted_io_lines = lines2lists(formatted_io_f) req_fn = sys.argv[2] req_f = open(req_fn) req_lines = lines2lists(req_f) for io_line in formatted_io_lines: #1392469377972388 IO# 2 2099200 1024 0000000000000000002000000000000020202020000000000000000000000000 ts = io_line[0] # io_offset = io_line[3] for req_line in req_lines: if int(req_line[-1]) == 101010101010: #line format: "ADDR REQUEST# ts offset length datalog_offset cmd_type data_dir MARKER" #0000128 2 1392469377972388 2099200 1024 1024 42 1 101010101010 if ts == req_line[2]: req_num = req_line[1]
#!/usr/bin/env python #simplify io2ReqAndFile map #only keep REQ #, OFFSET1KB #, FILE name import sys import re from format_strace import lines2lists if __name__ == "__main__": fn = sys.argv[1] f = open(fn) flines = lines2lists(f) ret_lines = [] for line in flines: #1391577375582680 IO 4 50596864 1024 98393bc004000000000000000004000000100000010000000500000001000000 REQ 4 OFFSET 50596864 SIZE 1024 FILE journal io_info = line[1:5] #"IO 4 50596864 1024" req_info = line[6:8] #"REQ 4 " req_offset = line[9] remain = line[11:] #"1024 FILE journal" new_list = io_info + req_info + [req_offset] + remain ret_lines.append(new_list) for line in ret_lines: line_str = ' '.join(line) print line_str
ret_lines.append(title_line) title_line = ['| absTS | ', 'relativeTS | ', 'Syscall | ', '...'] ret_lines.append(title_line) first_line = linelists[0] first_ts = int(first_line[0]) for line in linelists: new_line = line abs_ts = int(new_line[0]) relative_ts = str(abs_ts - first_ts) new_line.insert(1, relative_ts) ret_lines.append(new_line) return ret_lines if __name__ == "__main__": if len(sys.argv) < 3: print "No enough arguments! Usage: ./THISSCRIPT formatted_io_log formatted_strace_log" exit(0) f_io = open(sys.argv[1]) f_syscall = open(sys.argv[2]) syscall_lists = lines2lists(f_syscall) io_lists = lines2lists(f_io) usefullines = insert_io2syscall(syscall_lists, io_lists) usefullines = reformat_list(usefullines) for line in usefullines: line_str = ' '.join(line) print line_str
#!/usr/bin/env python #merge io2req map and io2file map #./THISFILE io2req.sim io2file.sim import sys import re from format_strace import lines2lists if __name__ == "__main__": #for fn in sys.argv[1]: io2req_fn = sys.argv[1] io2req_f = open(io2req_fn) io2req_lines = lines2lists(io2req_f) io2file_fn = sys.argv[2] io2file_f = open(io2file_fn) io2file_lines = lines2lists(io2file_f) #for i in len(io2req_lines): # io2file_line = io2file_lines[i] # if len(io2file_line) == 2: #offset, filename # filename = io2file_line[1] # append_str = " FILE= " + filename # io2req_line[i].append(append_str) io_num = 0 for io2req, io2file in zip(io2req_lines, io2file_lines): #add IO # to the beginning #io_num += 1;
new_line = line[:-1] # rm datalog_offset # new_line.append(data_line_num) # new_line.append(data_linelists[data_line_num]) #append both dataline addr and data new_line.append( data_linelists[data_line_num][1]) # append only dataline data) ret_lines.append(new_line) return ret_lines if __name__ == "__main__": #for fn in sys.argv[1]: io_head_fn = sys.argv[1] head_f = open(io_head_fn) head_lines = lines2lists(head_f) head_lines = formatline(head_lines) head_lines = reformatline(head_lines) # for line in head_lines: # line_str = ' '.join(line) # print line_str io_data_fn = sys.argv[2] dataline_width = int(sys.argv[3]) data_f = open(io_data_fn) data_lines = lines2lists(data_f) data_lines = format_dataline(data_lines) # for line in usefullines: # print line
import sys import re import tempfile from format_strace import lines2lists from subprocess import call, STDOUT def rm_call_info(trace_list): """ rm call info from: 1391577375342982 IO 3 68814848 1024 0000000000000000000000000000000000000000000000000000000000000000 REQ 3 OFFSET 68814848 SIZE 1024 FILE casket.tcb CALL fsync casket.tcb to: 1391577375342982 IO 3 68814848 1024 0000000000000000000000000000000000000000000000000000000000000000 REQ 3 OFFSET 68814848 SIZE 1024 FILE casket.tcb """ ret_list = [] for line in trace_list: new_line = line[:14] ret_list.append(new_line) return ret_list if __name__ == "__main__": trace_file = open(sys.argv[1]) trace_list = lines2lists(trace_file) trace_list = rm_call_info(trace_list) for line in trace_list: print ' '.join(line)
isLarge = False elif isLarge and head_cnt < head_length: #this and next are same req, but this is a large req; head = 0, 1, 2 this_io_str = ' '.join(this_io) tmp_list.append( this_io_str) #some io may be added twice; rm dup later head_cnt += 1 ret_list = list(set(tmp_list)) #rm duplicates #ret_list = sorted(ret_list) return ret_list if __name__ == "__main__": mapping_file = open(sys.argv[1]) #io2ReqAndFile mapping io_list = lines2lists(mapping_file) #file format: #IO 1 REQ 1 SIZE 1024 IOOFFSET1KB 1 FILE NA #IO 2 REQ 2 SIZE 4096 IOOFFSET1KB 72193 FILE NA #IO 3 REQ 3 SIZE 1024 IOOFFSET1KB 72322 FILE NA #IO 4 REQ 4 SIZE 1024 IOOFFSET1KB 49411 FILE journal #IO 5 REQ 5 SIZE 2048 IOOFFSET1KB 49412 FILE journal result_io = get_head_io(io_list) io_list = [] for io in result_io: line_list = io.split() line_list[1] = int(line_list[1]) #change IO # to int for sorting later io_list.append(line_list)
#!/usr/bin/env python #merge io2file.static.sim and io2file.dynamic.sim #/PATH/THISFILE io2file.static.sim io2file.dynamic.sim import sys import re from format_strace import lines2lists if __name__ == "__main__": #for fn in sys.argv[1]: io2file_static_fn = sys.argv[1] static_f = open(io2file_static_fn) static_lines = lines2lists(static_f) io2file_dynamic_fn = sys.argv[2] dynamic_f = open(io2file_dynamic_fn) dynamic_lines = lines2lists(dynamic_f) combined_list = [] for stat_l, dyna_l in zip(static_lines, dynamic_lines): #static line: 7 72877056 NA #dynamic line: 7 72877056 fsync casket.tcb.wal #stat_l.insert(2, " #STATIC_FILE: ") dynamic_info = [" CALL"] + dyna_l[2:] #dynamic_info = dyna_l[2:] new_line = stat_l + dynamic_info combined_list.append(new_line)