Esempio n. 1
0
File: txn.py Progetto: snalli/eliza
    def __init__(self, tidargs, usrargs, sysargs):

        self.ep = None
        self.ep_ops = None
        self.true_ep_count = 0  # Count of true epochs
        self.null_ep_count = 0  # Count of null epochs
        self.cwrt_set = {}  # ?
        self.call_chain = []
        self.inter_arrival_duration = 0
        self.prev_ep_end = 0

        self.tid = tidargs[0]
        self.txid = tidargs[1]
        self.__tx_start = tidargs[2]
        self.__tx_end = 0.0
        self.logfile = tidargs[5]
        self.log = None

        self.ppid = sysargs[0]  # ?
        self.logdir = sysargs[6]  # ?

        self.usrargs = usrargs
        self.flow = self.usrargs.flow

        self.est = ep_stats()
        self.ptype = [0, 0, 0, 0]
        self.data_writes = 0
        self.meta_writes = 0
Esempio n. 2
0
def cal_cross_thd_dep(pid, args):

	logdir = args[0]
	logfile = args[1]
	global wpid
	global est
	global lookback_time
	global f_to_lnos_m
	global f_to_itree_m
	wpid = pid
	est = ep_stats()
	print "Worker " + str(wpid) + " analyzing " + logfile

	onlyfiles = [f for f in listdir(logdir) if isfile(join(logdir, f)) and 'txt' in f and f != logfile]
	
	for f in onlyfiles:
		make_itree(logdir, f)
		make_lmap(logdir, f)

	sys.exit(0) # It takes 32 seconds until this point. Cannot use this !
	''' Core analysis begins here ''' 
	fp = open(logdir + '/' + logfile, 'r')
	for l in fp:
		
		try:
			# this may fail due to out-range index
			epstr = l.split(';')[2]
			stime = est.get_stime_from_str(epstr)
			etime = est.get_etime_from_str(epstr)
		except:
			continue
		
		# A list of competing epochs for one guest thread, from all other threads
		concurrent_epochs = []
		for f,__itree in f_to_itree_m.iteritems():
			concurrent_epochs.append((f,list(__itree.search(stime - lookback_time,stime,strict=True))))

		if wpid == 0: # This needs to go
			if len(concurrent_epochs) > 0:
				keys = l.split(';')[1].split(',')
				print str(stime-lookback_time) + '-' + str(stime), keys
				
				for (f,l_iv) in concurrent_epochs:
					print "Concur", f, l_iv
					lno_to_lines_m = f_to_lines_m[f]

					concurrent_addr_m = {}
					for iv in l_iv:
						lno = iv.data
						for addr in lno_to_lines_m[lno]:
							concurrent_addr_m[addr] = 0

					for addr in keys:
							if addr in concurrent_addr_m:
								print "Deps", f, addr
							else:
								a = 1
Esempio n. 3
0
def cal_cross_thd_dep(pid, args):

    logdir = args[0]
    logfile = args[1]
    global debug
    debug = int(args[2])
    dl = 1
    # You may empty this before every lookup but i don't cuz it can be reused
    recently_touched_addr = {}
    global wpid
    global est
    global lookback_time
    global f_to_lnnums_m
    global f_to_epochs_m
    global datadir
    n_cross = 0
    n_self = 0
    wpid = pid
    BATCH = 10000
    progress = 0
    cross_dep_addrs = set()
    cross_thread_deps_short = {}
    self_thread_deps_short = {}
    #if wpid != 2:
    #	sys.exit(0)
    est = ep_stats()
    print "Worker " + str(wpid) + " analyzing " + logfile

    onlyfiles = [
        f for f in listdir(logdir) if isfile(join(logdir, f)) and 'txt' in f
    ]

    # for f in onlyfiles:
    # make_index_by_stime(logdir, f)
    # make_index_by_etime(logdir, f)
    # make_lnmap(logdir, f)

    print \
    ''' Core analysis begins here '''
    '''
    Added feature to detect and report cross thread dependencies
    on NVM addresses. It is a completely parallel algorithm.
    
    For each guest thread T, we iterate through all epochs in that thread.
    - For each epoch E in T, we maintain a global cache C of all epochs that started and
      ended between X and (X - t) secs in any guest thread.
      ... X is that starting time of the epoch E under consideration
          and t is configurable, preferably 50 usecs.
    - Then for each NVM addr A in E, we search C to find the most recent
      epoch E' that dirtied A. If E' is in T, it is a self-dependency else
      it is a cross-thread dependency.

	'''
    with open(logdir + '/' + logfile, 'r') as fp:
        if debug > 0:
            fo = open(logdir + "/deps-" + logfile.split('.')[0], 'w')
        for lno, l in enumerate(fp):

            progress += 1
            if (progress % BATCH) == 0:
                print "Worker", str(wpid), "finished", str(
                    "{:,}".format(progress)), " epochs"

            try:
                ep_addr = l.split(';')[1].split(',')
                self_writers = l.split(';')[2].split(',')
                assert len(ep_addr) == len(self_writers)
                ep_summ = l.split(';')[
                    3]  # this may fail due to out-range index
                stime = est.get_stime_from_str(ep_summ)
                etime = est.get_etime_from_str(ep_summ)
            except:
                continue

            if debug > dl:
                print '>>>>', lno + 1, stime, etime, ep_addr
            for f in onlyfiles:

                #t = find_recent_past_ep(f, stime - lookback_time, stime)
                # Find epochs that ended before the current epoch in the last lookback_time secs
                t = find_recent_past_ep(f, etime - lookback_time, etime)
                if t is not None:
                    sno = t[0]
                    eno = t[1]
                    if f == logfile:
                        if eno >= lno + 1:
                            print eno, lno + 1, f
                            assert eno < lno + 1
                    '''
						We're finding all epochs in (X - t) secs identified
						by line number (lno) in a per-thread log where each line
						is one epoch. The asserts check if the epoch is really
						in the interval (X - t) secs.
					'''
                    # print sno,'-',eno, f_to_lnnums_m[f][sno][1][0], f_to_lnnums_m[f][eno][1][1],f
                    # start and end times of left  most epoch falling in the interval (X - t) secs
                    # Disabled : assert stime - lookback_time <= f_to_lnnums_m[f][sno][1][0] and f_to_lnnums_m[f][sno][1][1] <= stime
                    # start and end times of right most epoch falling in the interval (X - t) secs
                    # Disabled : assert stime - lookback_time <= f_to_lnnums_m[f][eno][1][0] and f_to_lnnums_m[f][eno][1][1] <= stime

                    # Only assert the end time of the right most epoch is within the desired interval
                    if debug > dl:
                        print sno, etime - lookback_time, f_to_lnnums_m[f][
                            sno][1], etime
                        print eno, etime - lookback_time, f_to_lnnums_m[f][
                            eno][1], etime

                    assert etime - lookback_time <= f_to_lnnums_m[f][eno][1][
                        1] and f_to_lnnums_m[f][eno][1][1] <= etime
                    for ln in range(sno, eno + 1):
                        if ln not in f_to_lnnums_m[f]:
                            continue

                        l_addr = f_to_lnnums_m[f][ln][0]
                        tmstmp = f_to_lnnums_m[f][ln][1]
                        other_writers = f_to_lnnums_m[f][ln][2]
                        assert len(l_addr) == len(other_writers)
                        # tmstmp[0] Start time of an epoch
                        # tmstmp[1] End time of an epoch
                        for i in range(0, len(l_addr)):
                            addr = l_addr[i]
                            o_wrt = other_writers[i]
                            if addr not in recently_touched_addr:
                                recently_touched_addr[addr] = (tmstmp[0],
                                                               tmstmp[1], f,
                                                               ln, o_wrt)
                            else:
                                ''' Temporary fix for carelessness in nstore. TODO : Remove later '''
                                a, xxx = tmstmp[1], tmstmp[0]
                                c, xxx = recently_touched_addr[addr][
                                    1], recently_touched_addr[addr][0]
                                ''' Last-writer-epoch owns the cache line. I think this is correct '''
                                # if (tmstmp[0], tmstmp[1], f, ln) > recently_touched_addr[addr]:
                                if a > c:
                                    recently_touched_addr[addr] = (tmstmp[0],
                                                                   tmstmp[1],
                                                                   f, ln,
                                                                   o_wrt)
                                # This tuple comparison is the key for this entire algorithm to work
                                '''
									A key assumption that makes this work is that epochs that race
									for NVM addresses will be rare or non-existent, which means dependent epochs 
									will not race and strictly have a happens before relationship 
									
									NOTE : This assumption breaks when authors don't bother to carefully
									issue fences. For eg, at the end of a txn PMFS and nstore don't bother
									to fence their writes to NVM leading to long epochs. This messes with my
									tools.
								'''
            ''' 
				What we've done so far is form a list of NVM addresses
				dirtied by all threads in the last t micro-seconds.
				This list also identifies the last epoch to dirty the addr.
			'''

            nprint = {}
            for ea in ep_addr:
                nprint[ea] = 0

            cross_thread_deps = set()
            self_thread_deps = set()
            for i in range(0, len(ep_addr)):
                ea = ep_addr[i]
                s_wrt = self_writers[i]
                if ea in recently_touched_addr:
                    # Asserting that the starting time of last owning epoch is within X & (X - t) secs - GOOD !
                    b0 = (stime - lookback_time <= recently_touched_addr[ea][0]
                          and recently_touched_addr[ea][0] <= stime)
                    # Asserting that the ending time of last owning epoch is within X & (X - t) secs - GOOD !
                    b1 = (stime - lookback_time <= recently_touched_addr[ea][1]
                          and recently_touched_addr[ea][1] <= stime)
                    if not (b0 and b1):
                        continue

                    if nprint[ea] == 0 and debug > 1:
                        # fo.write('>>>> ' + str(lno+1) + ' ' + str(stime - lookback_time) + ' ' + str(stime) + ' ' + str(ep_addr) + '\n')
                        nprint[ea] = 1

                    ownership = "deadbeef"
                    f, ln, w = recently_touched_addr[ea][
                        2], recently_touched_addr[ea][
                            3], recently_touched_addr[ea][4]
                    if logfile != f:
                        ownership = "cross_thread"
                        if (f, ln, ea) not in cross_thread_deps:
                            cross_thread_deps.add((f, ln, ea))
                            if debug > 0:
                                fo.write(ownership + ' ' + str(ea) + ' (' +
                                         str(s_wrt) + ',' + str(lno + 1) +
                                         ') => ' + str(ea) + ' (' + str(w) +
                                         ',' + str(f) + ',' + str(ln) + ')\n')
                                # W happened before s_wrt
                                tstr = str(s_wrt) + ' => ' + str(w) + ' '
                                if tstr in cross_thread_deps_short:
                                    cross_thread_deps_short[tstr] += 1
                                else:
                                    cross_thread_deps_short[tstr] = 1
                            n_cross += 1
                    else:
                        ownership = "self_thread"
                        if (f, ln, ea) not in self_thread_deps:
                            self_thread_deps.add((f, ln, ea))
                            if debug > 0:
                                fo.write(ownership + ' ' + str(ea) + ' (' +
                                         str(s_wrt) + ',' + str(lno + 1) +
                                         ') => ' + str(ea) + ' (' + str(w) +
                                         ',' + str(f) + ',' + str(ln) + ')\n')
                                # W happened before s_wrt
                                tstr = str(s_wrt) + ' => ' + str(w) + ' '
                                if tstr in self_thread_deps_short:
                                    self_thread_deps_short[tstr] += 1
                                else:
                                    self_thread_deps_short[tstr] = 1
                            n_self += 1

                        # fantastic code, excellent use of data structures
                        # excellent use of bisect algo, python tuple comparison features
                        # excellent use of python set, and type-indenpendence features !
            '''
				What we've done so far is to list self- and cross- thread
				dependencies in the last X usecs using a cache of NVM addresses
				that is updated for each epoch and keeps track of the most
				recent owner epoch. Then we simply check this cache for the most
				recent owner epoch in a different SMT context !
			'''
    if debug > 0:
        fo.write("\n\n Summary of cross dependencies \n\n")
        for k, v in cross_thread_deps_short.items():
            fo.write(str(k) + ':' + str("cross") + '\n')
        fo.write("\n\n Summary of self dependencies \n\n")
        for k, v in self_thread_deps_short.items():
            fo.write(str(k) + ':' + str("self") + '\n')
        fo.close()
    # print logfile, "n_cross=", n_cross," n_self=", n_self
    print logfile, "n_cross=", n_cross  #, sorted(list(cross_dep_addrs))
    print logfile, "n_self=", n_self  #, sorted(list(cross_dep_addrs))
    print "Worker", str(wpid), "finished", str(
        "{:,}".format(progress)), " epochs"
Esempio n. 4
0
File: main.py Progetto: snalli/eliza
def digest(usrargs, sysargs):

    n_tl = 0
    BATCH = 100000  # Worker emits stmt after processing BATCH entries
    tid = -1
    m_threads = {}  #tls
    est = ep_stats()

    # pmap[wpid] = Process(target=digest, args=(args, [wpid, [], 100000, w, None, [-1], logdir]))
    tfile = usrargs.tfile
    ttype = usrargs.ttype
    pt = int(usrargs.pt)

    pid = sysargs[0]
    exclude = sysargs[1]
    include = sysargs[
        5]  # List of host tids you want to examine, for internal use only

    if pt > 0:
        op = open("/dev/shm/" + \
         str(os.path.basename(tfile).split('.')[0]) + \
         '_' + str(pid) + '.t', 'w')

    if ttype == 'ftrace':
        tread = ftread(usrargs, sysargs)
    elif ttype == 'utrace':
        tread = utread(usrargs, sysargs)

    try:
        if 'gz' in tfile:
            cmd = "zcat " + tfile
        else:
            print "Please provide a compressed gzipped trace file"
            sys.exit(0)
    except:
        print "Unable to open ", tfile
        sys.exit(errno.EIO)

    try:  #for i in range(0,1):
        for tl in os.popen(cmd, 'r', 32768):  # input is global

            te = tread.get_tentry(tl)

            if te is None:
                continue

            if te.get_tid() in exclude:
                continue

            n_tl += 1

            if (n_tl % BATCH == 0):
                print "Worker ", pid, "completed ", \
                str("{:,}".format(n_tl)) , " trace entries"

            if pt > 0:  # pt = 1
                if -1 in include:
                    # Write all
                    op.write(tl)
                    op.flush()
                elif te.get_tid() in include:
                    # Write only specific tids
                    op.write(tl)
                    op.flush()

            if te.get_tid() != tid:
                tid = te.get_tid()
                if tid not in m_threads:
                    m_threads[tid] = smt(tid, usrargs, sysargs)
                curr = m_threads[tid]

            if pt > 1:  # pt = 2
                l = te.te_list()
                op.write('te = ' + str(l) + '\n')
                op.flush()

            # curr.update_call_chain(caller, callee)

            ep = curr.do_tentry(te)

            if ep is not None:
                if pt > 2:  # pt = 3
                    op.write('ep = ' + str(ep.ep_list()) + '\n')
                    op.flush()

                t = est.get_tuple(ep)
                if pt > 3:  # pt = 4
                    op.write('tu[' + str(t_buf_len - 1) + '] = ' \
                    + str(t_buf[t_buf_len-1]) + '\n')
                    op.flush()
    except:
        sys.exit(0)