def slice_closure(tbdict): print "Performing slice closure..." fp_iterations = 0 outerst = time.time() while True: innerst = time.time() wlist = defaultdict(list) for t in tbdict: tbs = tbdict[t] for insns in zip(*[r.body for r in tbs]): if (any(insn.in_slice for _,insn in insns) and not all(insn.in_slice for _,insn in insns)): for j, (n,x) in enumerate(insns): if not x.in_slice: wlist[tbs[j].trace].append( (n, uses(x)) ) x.mark() if not wlist: break for trace in wlist: multislice(trace, wlist[trace]) fp_iterations += 1 innered = time.time() print "Sliced %d new instructions in %s" % (sum(len(w) for w in wlist.values()), datetime.timedelta(seconds=innered-innerst)) outered = time.time() print "Reached fixed point after %d iterations, time: %s" % (fp_iterations, datetime.timedelta(seconds=outered-outerst))
def set_input(trace, inbufs): inbufs = set(inbufs) for i, te in trace: uses_set = set(uses(te)) if uses_set & inbufs: defs = defines(te) assert len(defs) == 1 te.op = "IFLO_SET_INPUT" te.args = [defs[0], 0]
def reroll_loops(trace, slice, debug=False): slice_indices = set(s[0] for s in slice) loops = detect_loops(trace) # Filter the loops -- ignore any that aren't part of the slice loops_in_slice = [] for loop in loops: # Filter: For all TBs in the loop, there exists an insn in the # TB that is in the slice. We ignore the last loop body since # it just contains insns to exit the loop. if all( any(insn[0] in slice_indices for insn in tb.body) for tb in loop[:-1]): print "Found a loop of length", len( loop), "which has data flow through it" loops_in_slice.append(Loop(loop)) if debug: for i, loop in enumerate(loops_in_slice): print "----- Begin Loop %d -----" % i loop.dump() print "----- End Loop %d -----" % i slice_dict = dict(slice) for loop in loops_in_slice: # We need to do this for every TB because we need to specify # explicitly the data flow for the loop condition -- it has no # defines, only uses. loop_slice = {} for tb in loop.tbs: loop_start, loop_end = tb.range() tb_slice = dynslice(trace, uses(loop.condition), start=loop_end) loop_slice.update(tb_slice) if debug: print "Loop slice:" for l in sorted(loop_slice.items()): print l slice_dict.update(loop_slice) # Now prune the loop exemplars so they only contain sliced instructions newslice = sorted(slice_dict.items()) for loop in loops_in_slice: loop.prune(newslice) # Slice surgery: get rid of the unrolled loops, and insert the # loop object in their place for loop in loops_in_slice: loop_start, loop_end = loop.range() slice_dict = del_slice_range(loop_start, loop_end, slice_dict) slice_dict[loop.pos()] = loop newslice = sorted(slice_dict.items()) return newslice
def multislice(insns, worklist, output_track=False, debug=False): wlist = worklist[:] wlist.sort() start, bufs = wlist.pop() if wlist: next_i, next_bufs = wlist.pop() else: next_i = -1 if start == -1: start = len(insns) - 1 if output_track: outbufs = set(bufs) if not quiet: widgets = ['Slicing: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=start+1).start() work = set(bufs) if debug: print "Initial working set:", work for i in range(start, -1, -1): #print "Examining instruction",i,"(working set: %d)" % len(work) if not quiet: pbar.update(start-i+1) insn = insns[i] if i == next_i: work |= set(next_bufs) if wlist: next_i, next_bufs = wlist.pop() defs_set = set(defines(insn)) uses_set = set(uses(insn)) if debug: print repr(insn) if defs_set & work: if debug: print "Overlap with working set: %s" % (defs_set & work) work -= defs_set work |= uses_set if debug: print "Adding to slice: %s" % repr(insn) if debug: print "Current WS:", work # TODO: allow multiple outputs by separating outbufs into # a dict of (label => memrange) pairs if output_track and defs_set & outbufs: if debug: print "Accounted for %d of %d output bytes" % (len(defs_set & outbufs), len(outbufs)) if debug: print "Instruction: %s" % repr(insn) outbufs -= defs_set insn.set_output_label("out") insn.mark() if debug: print "Working set at end:", work
def dynslice(insns, bufs, start=-1, output_track=False, debug=False): """Perform a dynamic data slice. Perform a dynamic data slice of a trace with respect to a set of buffers. This is basically the algorithm described in the K-Tracer paper. insns: a list of tuples: (index, TraceEntry) bufs: a list of outputs to be tracked start: an optional point in the trace at which to begin analysis. By default, analysis begins at the last instruction in the trace. ouput_track: mark instructions that define data in the output set. This calls TraceEntry.set_output_label(). debug: enable debugging information Returns: a list of tuples: (index, TraceEntry) """ if start == -1: start = len(insns) - 1 if output_track: outbufs = set(bufs) work = set(bufs) slice = [] for i, insn in reversed(insns[:start + 1]): defs_set = set(defines(insn)) uses_set = set(uses(insn)) if debug: print repr(insn) if defs_set & work: if debug: print "Overlap with working set: %s" % (defs_set & work) work = (work - defs_set) | uses_set if debug: print "Adding to slice: %s" % repr(insn) # TODO: allow multiple outputs by separating outbufs into # a dict of (label => memrange) pairs if output_track and defs_set & outbufs: outbufs -= defs_set insn.set_output_label("out") slice.insert(0, (i, insn)) if debug: print "Working set at end:", work return slice
def linked_vars(insns, sink, source, start=-1, end=0, debug=False): if debug: print "Linking vars sink: %s source: %s between trace positions %d and %d" % (sink, source, end, start) if start == -1: start = len(insns) - 1 work = set([sink]) for i,insn in reversed(insns[end:start+1]): defs_set = set(defines(insn)) uses_set = set(uses(insn)) # For this one special case we DON'T want to track # the derivation of the address of a buffer. if is_memop(insn): uses_set -= set(["A0"]) if defs_set & work: work = (work - defs_set) | uses_set if debug: print i,repr(insn) if debug: print "Working set at end:", work return source in work
def control_dep_slice(tbdict, cfg): print "Calculating control dependencies..." start_ts = time.time() wlist = defaultdict(list) for c in cfg: if len(cfg[c]) < 2: continue for t in tbdict[c]: for i, isn in t.body: if is_jcc(isn.op) or is_dynjump(isn.op): wlist[t.trace].append( (i, uses(isn)) ) isn.mark() wlist = dict(wlist) for trace in wlist: multislice(trace, wlist[trace]) end_ts = time.time() print "Added branches in %s" % (datetime.timedelta(seconds=end_ts-start_ts))
def linked_vars(insns, sink, source, start=-1, end=0, debug=False): if debug: print "Linking vars sink: %s source: %s between trace positions %d and %d" % ( sink, source, end, start) if start == -1: start = len(insns) - 1 work = set([sink]) for i, insn in reversed(insns[end:start + 1]): defs_set = set(defines(insn)) uses_set = set(uses(insn)) # For this one special case we DON'T want to track # the derivation of the address of a buffer. if is_memop(insn): uses_set -= set(["A0"]) if defs_set & work: work = (work - defs_set) | uses_set if debug: print i, repr(insn) if debug: print "Working set at end:", work return source in work
def isolate_self_ints(trace): apic_base = 0xfee00000 apic_tpr = apic_base + 0x80 selfints = find_self_interrupts(trace) idt = {} int_edges = set() to_delete = [] wlist = [] for self_int, tpr_set, int_start, int_end in selfints: int_site = first(lambda x: x.op == 'IFLO_TB_HEAD_EIP', reversed(trace[int_start-50:int_start])) int_handler = trace[int_start+1] ret = first(lambda x: x.op == 'IFLO_TB_HEAD_EIP', reversed(trace[int_end-50:int_end])) retsite = first(lambda x: x.op == 'IFLO_TB_HEAD_EIP', trace[int_end:int_end+20]) int_edges.add( (int_site.args[0], int_handler.args[0]) ) int_edges.add( (ret.args[0], retsite.args[0]) ) to_delete += [int_start, int_end] vec = trace[int_start].args[0] print "Detected self-interrupt, edges: %#x -> %#x, %#x -> %#x, vector: %#x (%#x)" % ( int_site.args[0], int_handler.args[0], ret.args[0], retsite.args[0], vec, trace[self_int].args[-1] & 0xff, ) idt[vec] = int_handler.args[0] # Gotta include the one right before the ICR set as well i = self_int while i > 0: e = trace[i] if e.op.startswith('IFLO_OPS_MEM_STL') and e.args[1] == apic_tpr: e.mark() wlist.append( (i, uses(e)) ) break i -= 1 # Noops used here as temporary placeholders. We will # delete them after slicing, but deletion now would mess # up our indices trace[int_start] = TraceEntry(('IFLO_NOOP', [])) trace[int_end] = TraceEntry(('IFLO_NOOP', [])) wlist.append( (self_int, uses(trace[self_int])) ) wlist.append( (tpr_set, uses(trace[tpr_set])) ) trace[self_int].mark() trace[tpr_set].mark() if wlist: multislice(trace, wlist) # Remove the actual interrupt/irets to_delete.sort() while to_delete: del trace[to_delete.pop()] trace, tbs, tbdict, cfg = remake_trace(trace) for s,d in int_edges: try: cfg[s].remove(d) except KeyError: pass return trace, tbs, tbdict, cfg, idt
idx, insn = trace[i] if (insn.op == 'IFLO_OPS_MEM_LDL_T0_A0' or insn.op == 'IFLO_OPS_MEM_LDL_T1_A0'): memb_addr = insn.args[1] memb_val = insn.args[2] #print "Pointer dereference => Read 4 bytes at %#x = %#x" % (memb_addr,memb_val) # Tiny version of dynamic slicing -- track A0 until it comes from somewhere else j = i work = set(['A0']) slice = [] while True: j -= 1 _, trcent = trace[j] defs_set = set(defines(trcent)) uses_set = set(uses(trcent)) if defs_set & work: work = (work - defs_set) | uses_set slice.append((j, trcent)) if 'A0' not in work: break valid = False objbase = UInt(memb_addr) for i, s in slice[:-1]: if s.op == "IFLO_ADDL_A0_IM": objbase -= UInt(s.args[0]) elif s.op == "IFLO_ADDL_A0_SEG": pass elif s.op == "IFLO_MOVL_A0_IM": objbase -= UInt(s.args[0]) else:
for i in xrange(len(trace)-1,-1,-1): idx, insn = trace[i] if (insn.op == 'IFLO_OPS_MEM_LDL_T0_A0' or insn.op == 'IFLO_OPS_MEM_LDL_T1_A0'): memb_addr = insn.args[1] memb_val = insn.args[2] #print "Pointer dereference => Read 4 bytes at %#x = %#x" % (memb_addr,memb_val) # Tiny version of dynamic slicing -- track A0 until it comes from somewhere else j = i work = set(['A0']) slice = [] while True: j -= 1 _ , trcent = trace[j] defs_set = set(defines(trcent)) uses_set = set(uses(trcent)) if defs_set & work: work = (work - defs_set) | uses_set slice.append((j,trcent)) if 'A0' not in work: break valid = False objbase = UInt(memb_addr) for i,s in slice[:-1]: if s.op == "IFLO_ADDL_A0_IM": objbase -= UInt(s.args[0]) elif s.op == "IFLO_ADDL_A0_SEG": pass elif s.op == "IFLO_MOVL_A0_IM": objbase -= UInt(s.args[0]) else: