def __init__(self, tidargs, usrargs, sysargs): self.ep = None self.ep_ops = None self.true_ep_count = 0 # Count of true epochs self.null_ep_count = 0 # Count of null epochs self.cwrt_set = {} # ? self.call_chain = [] self.inter_arrival_duration = 0 self.prev_ep_end = 0 self.tid = tidargs[0] self.txid = tidargs[1] self.__tx_start = tidargs[2] self.__tx_end = 0.0 self.logfile = tidargs[5] self.log = None self.ppid = sysargs[0] # ? self.logdir = sysargs[6] # ? self.usrargs = usrargs self.flow = self.usrargs.flow self.est = ep_stats() self.ptype = [0, 0, 0, 0] self.data_writes = 0 self.meta_writes = 0
def cal_cross_thd_dep(pid, args): logdir = args[0] logfile = args[1] global wpid global est global lookback_time global f_to_lnos_m global f_to_itree_m wpid = pid est = ep_stats() print "Worker " + str(wpid) + " analyzing " + logfile onlyfiles = [f for f in listdir(logdir) if isfile(join(logdir, f)) and 'txt' in f and f != logfile] for f in onlyfiles: make_itree(logdir, f) make_lmap(logdir, f) sys.exit(0) # It takes 32 seconds until this point. Cannot use this ! ''' Core analysis begins here ''' fp = open(logdir + '/' + logfile, 'r') for l in fp: try: # this may fail due to out-range index epstr = l.split(';')[2] stime = est.get_stime_from_str(epstr) etime = est.get_etime_from_str(epstr) except: continue # A list of competing epochs for one guest thread, from all other threads concurrent_epochs = [] for f,__itree in f_to_itree_m.iteritems(): concurrent_epochs.append((f,list(__itree.search(stime - lookback_time,stime,strict=True)))) if wpid == 0: # This needs to go if len(concurrent_epochs) > 0: keys = l.split(';')[1].split(',') print str(stime-lookback_time) + '-' + str(stime), keys for (f,l_iv) in concurrent_epochs: print "Concur", f, l_iv lno_to_lines_m = f_to_lines_m[f] concurrent_addr_m = {} for iv in l_iv: lno = iv.data for addr in lno_to_lines_m[lno]: concurrent_addr_m[addr] = 0 for addr in keys: if addr in concurrent_addr_m: print "Deps", f, addr else: a = 1
def cal_cross_thd_dep(pid, args): logdir = args[0] logfile = args[1] global debug debug = int(args[2]) dl = 1 # You may empty this before every lookup but i don't cuz it can be reused recently_touched_addr = {} global wpid global est global lookback_time global f_to_lnnums_m global f_to_epochs_m global datadir n_cross = 0 n_self = 0 wpid = pid BATCH = 10000 progress = 0 cross_dep_addrs = set() cross_thread_deps_short = {} self_thread_deps_short = {} #if wpid != 2: # sys.exit(0) est = ep_stats() print "Worker " + str(wpid) + " analyzing " + logfile onlyfiles = [ f for f in listdir(logdir) if isfile(join(logdir, f)) and 'txt' in f ] # for f in onlyfiles: # make_index_by_stime(logdir, f) # make_index_by_etime(logdir, f) # make_lnmap(logdir, f) print \ ''' Core analysis begins here ''' ''' Added feature to detect and report cross thread dependencies on NVM addresses. It is a completely parallel algorithm. For each guest thread T, we iterate through all epochs in that thread. - For each epoch E in T, we maintain a global cache C of all epochs that started and ended between X and (X - t) secs in any guest thread. ... X is that starting time of the epoch E under consideration and t is configurable, preferably 50 usecs. - Then for each NVM addr A in E, we search C to find the most recent epoch E' that dirtied A. If E' is in T, it is a self-dependency else it is a cross-thread dependency. ''' with open(logdir + '/' + logfile, 'r') as fp: if debug > 0: fo = open(logdir + "/deps-" + logfile.split('.')[0], 'w') for lno, l in enumerate(fp): progress += 1 if (progress % BATCH) == 0: print "Worker", str(wpid), "finished", str( "{:,}".format(progress)), " epochs" try: ep_addr = l.split(';')[1].split(',') self_writers = l.split(';')[2].split(',') assert len(ep_addr) == len(self_writers) ep_summ = l.split(';')[ 3] # this may fail due to out-range index stime = est.get_stime_from_str(ep_summ) etime = est.get_etime_from_str(ep_summ) except: continue if debug > dl: print '>>>>', lno + 1, stime, etime, ep_addr for f in onlyfiles: #t = find_recent_past_ep(f, stime - lookback_time, stime) # Find epochs that ended before the current epoch in the last lookback_time secs t = find_recent_past_ep(f, etime - lookback_time, etime) if t is not None: sno = t[0] eno = t[1] if f == logfile: if eno >= lno + 1: print eno, lno + 1, f assert eno < lno + 1 ''' We're finding all epochs in (X - t) secs identified by line number (lno) in a per-thread log where each line is one epoch. The asserts check if the epoch is really in the interval (X - t) secs. ''' # print sno,'-',eno, f_to_lnnums_m[f][sno][1][0], f_to_lnnums_m[f][eno][1][1],f # start and end times of left most epoch falling in the interval (X - t) secs # Disabled : assert stime - lookback_time <= f_to_lnnums_m[f][sno][1][0] and f_to_lnnums_m[f][sno][1][1] <= stime # start and end times of right most epoch falling in the interval (X - t) secs # Disabled : assert stime - lookback_time <= f_to_lnnums_m[f][eno][1][0] and f_to_lnnums_m[f][eno][1][1] <= stime # Only assert the end time of the right most epoch is within the desired interval if debug > dl: print sno, etime - lookback_time, f_to_lnnums_m[f][ sno][1], etime print eno, etime - lookback_time, f_to_lnnums_m[f][ eno][1], etime assert etime - lookback_time <= f_to_lnnums_m[f][eno][1][ 1] and f_to_lnnums_m[f][eno][1][1] <= etime for ln in range(sno, eno + 1): if ln not in f_to_lnnums_m[f]: continue l_addr = f_to_lnnums_m[f][ln][0] tmstmp = f_to_lnnums_m[f][ln][1] other_writers = f_to_lnnums_m[f][ln][2] assert len(l_addr) == len(other_writers) # tmstmp[0] Start time of an epoch # tmstmp[1] End time of an epoch for i in range(0, len(l_addr)): addr = l_addr[i] o_wrt = other_writers[i] if addr not in recently_touched_addr: recently_touched_addr[addr] = (tmstmp[0], tmstmp[1], f, ln, o_wrt) else: ''' Temporary fix for carelessness in nstore. TODO : Remove later ''' a, xxx = tmstmp[1], tmstmp[0] c, xxx = recently_touched_addr[addr][ 1], recently_touched_addr[addr][0] ''' Last-writer-epoch owns the cache line. I think this is correct ''' # if (tmstmp[0], tmstmp[1], f, ln) > recently_touched_addr[addr]: if a > c: recently_touched_addr[addr] = (tmstmp[0], tmstmp[1], f, ln, o_wrt) # This tuple comparison is the key for this entire algorithm to work ''' A key assumption that makes this work is that epochs that race for NVM addresses will be rare or non-existent, which means dependent epochs will not race and strictly have a happens before relationship NOTE : This assumption breaks when authors don't bother to carefully issue fences. For eg, at the end of a txn PMFS and nstore don't bother to fence their writes to NVM leading to long epochs. This messes with my tools. ''' ''' What we've done so far is form a list of NVM addresses dirtied by all threads in the last t micro-seconds. This list also identifies the last epoch to dirty the addr. ''' nprint = {} for ea in ep_addr: nprint[ea] = 0 cross_thread_deps = set() self_thread_deps = set() for i in range(0, len(ep_addr)): ea = ep_addr[i] s_wrt = self_writers[i] if ea in recently_touched_addr: # Asserting that the starting time of last owning epoch is within X & (X - t) secs - GOOD ! b0 = (stime - lookback_time <= recently_touched_addr[ea][0] and recently_touched_addr[ea][0] <= stime) # Asserting that the ending time of last owning epoch is within X & (X - t) secs - GOOD ! b1 = (stime - lookback_time <= recently_touched_addr[ea][1] and recently_touched_addr[ea][1] <= stime) if not (b0 and b1): continue if nprint[ea] == 0 and debug > 1: # fo.write('>>>> ' + str(lno+1) + ' ' + str(stime - lookback_time) + ' ' + str(stime) + ' ' + str(ep_addr) + '\n') nprint[ea] = 1 ownership = "deadbeef" f, ln, w = recently_touched_addr[ea][ 2], recently_touched_addr[ea][ 3], recently_touched_addr[ea][4] if logfile != f: ownership = "cross_thread" if (f, ln, ea) not in cross_thread_deps: cross_thread_deps.add((f, ln, ea)) if debug > 0: fo.write(ownership + ' ' + str(ea) + ' (' + str(s_wrt) + ',' + str(lno + 1) + ') => ' + str(ea) + ' (' + str(w) + ',' + str(f) + ',' + str(ln) + ')\n') # W happened before s_wrt tstr = str(s_wrt) + ' => ' + str(w) + ' ' if tstr in cross_thread_deps_short: cross_thread_deps_short[tstr] += 1 else: cross_thread_deps_short[tstr] = 1 n_cross += 1 else: ownership = "self_thread" if (f, ln, ea) not in self_thread_deps: self_thread_deps.add((f, ln, ea)) if debug > 0: fo.write(ownership + ' ' + str(ea) + ' (' + str(s_wrt) + ',' + str(lno + 1) + ') => ' + str(ea) + ' (' + str(w) + ',' + str(f) + ',' + str(ln) + ')\n') # W happened before s_wrt tstr = str(s_wrt) + ' => ' + str(w) + ' ' if tstr in self_thread_deps_short: self_thread_deps_short[tstr] += 1 else: self_thread_deps_short[tstr] = 1 n_self += 1 # fantastic code, excellent use of data structures # excellent use of bisect algo, python tuple comparison features # excellent use of python set, and type-indenpendence features ! ''' What we've done so far is to list self- and cross- thread dependencies in the last X usecs using a cache of NVM addresses that is updated for each epoch and keeps track of the most recent owner epoch. Then we simply check this cache for the most recent owner epoch in a different SMT context ! ''' if debug > 0: fo.write("\n\n Summary of cross dependencies \n\n") for k, v in cross_thread_deps_short.items(): fo.write(str(k) + ':' + str("cross") + '\n') fo.write("\n\n Summary of self dependencies \n\n") for k, v in self_thread_deps_short.items(): fo.write(str(k) + ':' + str("self") + '\n') fo.close() # print logfile, "n_cross=", n_cross," n_self=", n_self print logfile, "n_cross=", n_cross #, sorted(list(cross_dep_addrs)) print logfile, "n_self=", n_self #, sorted(list(cross_dep_addrs)) print "Worker", str(wpid), "finished", str( "{:,}".format(progress)), " epochs"
def digest(usrargs, sysargs): n_tl = 0 BATCH = 100000 # Worker emits stmt after processing BATCH entries tid = -1 m_threads = {} #tls est = ep_stats() # pmap[wpid] = Process(target=digest, args=(args, [wpid, [], 100000, w, None, [-1], logdir])) tfile = usrargs.tfile ttype = usrargs.ttype pt = int(usrargs.pt) pid = sysargs[0] exclude = sysargs[1] include = sysargs[ 5] # List of host tids you want to examine, for internal use only if pt > 0: op = open("/dev/shm/" + \ str(os.path.basename(tfile).split('.')[0]) + \ '_' + str(pid) + '.t', 'w') if ttype == 'ftrace': tread = ftread(usrargs, sysargs) elif ttype == 'utrace': tread = utread(usrargs, sysargs) try: if 'gz' in tfile: cmd = "zcat " + tfile else: print "Please provide a compressed gzipped trace file" sys.exit(0) except: print "Unable to open ", tfile sys.exit(errno.EIO) try: #for i in range(0,1): for tl in os.popen(cmd, 'r', 32768): # input is global te = tread.get_tentry(tl) if te is None: continue if te.get_tid() in exclude: continue n_tl += 1 if (n_tl % BATCH == 0): print "Worker ", pid, "completed ", \ str("{:,}".format(n_tl)) , " trace entries" if pt > 0: # pt = 1 if -1 in include: # Write all op.write(tl) op.flush() elif te.get_tid() in include: # Write only specific tids op.write(tl) op.flush() if te.get_tid() != tid: tid = te.get_tid() if tid not in m_threads: m_threads[tid] = smt(tid, usrargs, sysargs) curr = m_threads[tid] if pt > 1: # pt = 2 l = te.te_list() op.write('te = ' + str(l) + '\n') op.flush() # curr.update_call_chain(caller, callee) ep = curr.do_tentry(te) if ep is not None: if pt > 2: # pt = 3 op.write('ep = ' + str(ep.ep_list()) + '\n') op.flush() t = est.get_tuple(ep) if pt > 3: # pt = 4 op.write('tu[' + str(t_buf_len - 1) + '] = ' \ + str(t_buf[t_buf_len-1]) + '\n') op.flush() except: sys.exit(0)