def load_leaks(files, keys, source): for i in range(0, len(files)): with open(files[i], 'rb') as f: chunk = f.read() if keys is not None: with open(keys[i], 'rb') as fk: k = fk.read() else: k = None idx = 0 cs = CallStack() while idx < len(chunk): typ = struct.unpack('B', chunk[idx:idx + 1])[0] idx += 1 if typ == Type.FUNC_ENTRY.value: (caller, callee) = struct.unpack('<QQ', chunk[idx:idx + 16]) idx += 16 debug(2, "FUNC_ENTRY %x->%x", (caller, callee)) cs.docall_context(Context(caller, callee)) elif typ == Type.FUNC_EXIT.value: debug(2, "FUNC_EXIT") cs.doreturn_context() elif typ == Type.CFLEAK.value: (ip, no) = struct.unpack('<QQ', chunk[idx:idx + 16]) idx += 16 debug(2, "CFLEAK %x (%d)", (ip, no)) evidence = struct.unpack('<' + 'Q' * no, chunk[idx:idx + 8 * no]) debug(2, str(evidence)) idx += no * 8 leak = CFLeak(ip) ee = EvidenceEntry(evidence, k, source) leak.add_evidence(ee) if debuglevel(3): cs.doprint_reverse() leaks.report_leak(cs, leak, True) elif typ == Type.DLEAK.value: (ip, no) = struct.unpack('<QQ', chunk[idx:idx + 16]) idx += 16 debug(2, "DLEAK %x (%d)", (ip, no)) evidence = struct.unpack('<' + 'Q' * no, chunk[idx:idx + 8 * no]) debug(2, str(evidence)) idx += no * 8 leak = DataLeak(ip) ee = EvidenceEntry(evidence, k, source) leak.add_evidence(ee) if debuglevel(3): cs.doprint_reverse() leaks.report_leak(cs, leak, True) else: debug(0, "Unknown type") assert (False)
def lookahead(self, i): while i >= self.q.qsize(): if not self.refill(): return None e = self.q.queue[i] if debuglevel(4): self.debug(4, str(e)) return e
def report_dataleak(callstack, e1, e2): debug(1, "Data leak@ %08x: %08x vs %08x", (e1.ip, e1.data, e2.data)) if debuglevel(3): callstack.doprint_reverse() leak = DataLeak(e1.ip) leak.append(DataLeakEntry(e1.data)) leak.append(DataLeakEntry(e2.data)) leaks.report_leak(callstack, leak)
def report_cfleak(callstack, bp, mp, e1, len1, e2, len2): debug(1, "Control flow leak@BB %08x, merging@%08x(%s): %08x(%s)(+%d) vs %08x(%s)(+%d)", \ (bp, mp.ip, Type(mp.type).name, e1.ip, Type(e1.type), len1, e2.ip, Type(e2.type), len2)) if debuglevel(3): callstack.doprint_reverse() leak = CFLeak(bp) leak.append(CFLeakEntry(e1, len1, mp.ip)) leak.append(CFLeakEntry(e2, len2, mp.ip)) leaks.report_leak(callstack, leak)
def refill_chunk(self): assert self.chunk is not None assert len(self.chunk) % bs == 0 cblocks = int(len(self.chunk) / bs) unpacked = struct.unpack("<" + "BQQ" * cblocks, self.chunk) for i in range(0, cblocks): e = Entry(unpacked[i * 3:(i + 1) * 3]) self.q.put_nowait(e) if debuglevel(4): self.debug(4, "parsing %s", (e)) if Type.isbranch(e): if e.data != 0: # Report conditional branches/call/ret twice: # once as original branch/call/ret at the branch point # and once as BBL at the branch target e2 = Entry([Type.FUNC_BBL.value, e.data, 0]) self.q.put_nowait(e2) if debuglevel(4): self.debug(4, "Is branch, creating %s", (e2)) self.chunk = None
def get(self): while True: if self.q.empty(): if not self.refill(): return None e = self.q.get_nowait() self.callstack.update_context(e) if Type(e.type) in (Type.HALLOC, Type.HFREE): continue if debuglevel(4): self.debug(4, str(e)) return e
def merge_leaks_recursive(B, callstack): if debuglevel(3): callstack.doprint_reverse() for l in B.dataleaks: c = copy.deepcopy(l) leaks.report_leak(callstack, c) for l in B.cfleaks: c = copy.deepcopy(l) leaks.report_leak(callstack, c) for k in B.children: child = B.children[k] callstack.docall_context(child.ctxt) merge_leaks_recursive(child, callstack) callstack.doreturn_context()
def has_leak(self, callstack, leak): if callstack is None or len(callstack) == 0: if isinstance(leak, DataLeak): return leak in self.dataleaks elif isinstance(leak, CFLeak): return leak in self.cfleaks else: assert False else: # advance to correct calling context recursively # by consuming first callstack entry ctxt = callstack[0] assert isinstance(ctxt, Context) if debuglevel(3): debug(3, "Processing callstack:") for ci in callstack: debug(3, "%08x--%08x", (ci.caller, ci.callee)) if ctxt in self.children: return self.children[ctxt].has_leak(callstack[1:], leak)
def report_leak(self, callstack, leak, nocreate=False): if callstack is None or len(callstack) == 0: self.consume_leak(leak) else: # advance to correct calling context recursively # by consuming first callstack entry ctxt = callstack[0] assert isinstance(ctxt, Context) if debuglevel(5): debug(5, "Processing callstack") for ci in callstack: debug(5, "%08x--%08x", (ci.caller, ci.callee)) debug(5, "Handling ctxt %08x--%08x", (ctxt.caller, ctxt.callee)) if nocreate: assert ctxt in self.children elif ctxt not in self.children: self.children[ctxt] = CallHistory(ctxt, self) self.children[ctxt].report_leak(callstack[1:], leak, nocreate)
def debug(self, level, fstr, values=()): if debuglevel(level): instr = str(fstr % values) debug(level, "[%d]%s", (self.id, instr))
def iterate_queue(files, fast=True): global unpacked global trace global traces global queues bp = None bdepth = -1 queues = [TraceQueue(files[i], i) for i in range(0, len(files))] queues[0].id = 0 queues[1].id = 1 while True: if fast: [e1, e2, bp, bdepth] = fast_forward(queues, bp, bdepth) else: e1 = queues[0].get() e2 = queues[1].get() if e1 == None or e2 == None: if e1 != None or e2 != None: debug(0, "Unbalanced end of trace") break if e1 == e2: # no diff assert (queues[0].callstack == queues[1].callstack) if Type.isbranch(e1): bp = e1 bdepth = queues[0].callstack.depth() continue assert (bp == None or Type.isbranch(bp)) if e1.type == e2.type and e1.ip == e2.ip: if Type.isbranch(e1): bp = e1 bcallstack = copy.copy(queues[0].callstack) bdepth = bcallstack.depth() # We have a control flow leak debug(1, "CF Leak @ %08x, depth %d", (bp.ip, bdepth)) if debuglevel(3): queues[0].callstack.doprint_reverse() lhA = Lookahead(queues[0]) lhB = Lookahead(queues[1]) # Get 2 branches e1b = queues[0].lookahead(0) e2b = queues[1].lookahead(0) foundA = True foundB = True while True: if foundA: foundA = lhA.advance_next_bp_candidate(bdepth) if foundB: foundB = lhB.advance_next_bp_candidate(bdepth) mergepoint = Lookahead.intersect(lhA, lhB) if mergepoint is not None: break if not foundA and not foundB: debug(0, "No mergepoint found!") report_cfleak(queues[0].callstack, bp.ip, MergePoint(Type.FUNC_EXIT, 0, 0), e1b, -1, e2b, -1) return assert (isinstance(mergepoint, MergePoint)) debug(2, "found mp: %08x, depth %d", (mergepoint.ip, mergepoint.depth)) # Advance to mergepoint debug(2, "advancing to mp:") if debuglevel(3): queues[0].callstack.doprint_reverse() len1 = queues[0].advance(mergepoint) len2 = queues[1].advance(mergepoint) debug(2, "advanced to mp: %08x,%08x", (queues[0].lookahead(0).ip, queues[1].lookahead(0).ip)) if debuglevel(3): queues[0].callstack.doprint_reverse() assert (queues[0].lookahead(0).ip == queues[1].lookahead(0).ip) #assert(queues[0].callstack == queues[1].callstack) if not queues[0].callstack == queues[1].callstack: queues[0].callstack.doprint_reverse() print("====") queues[1].callstack.doprint_reverse() assert False assert (Type.isbranch(bp)) report_cfleak(bcallstack, bp.ip, mergepoint, e1b, len1, e2b, len2) elif Type(e1.type) in (Type.READ, Type.WRITE, Type.HREAD, Type.HWRITE): # We have a dataleak assert (e1.data != 0) assert (e2.data != 0) assert (queues[0].callstack == queues[1].callstack) if Type(e1.type) in (Type.HREAD, Type.HWRITE): e1.data &= 0x00000000FFFFFFFF e2.data &= 0x00000000FFFFFFFF if e1.data != e2.data: report_dataleak(queues[0].callstack, e1, e2) else: debug(0, "Unknown type") assert (False) elif Type(e1.type) in (Type.READ, Type.WRITE, Type.HREAD, Type.HWRITE): if Type(e2.type) in (Type.READ, Type.WRITE, Type.HREAD, Type.HWRITE): # Mixture of heap and non-heap read/write. Maybe, heap tracking is imprecise # We require that both elements are either (h)read or (h)write assert ((e1.type | Type.MASK_HEAP.value) == (e2.type | Type.MASK_HEAP.value)) if (e1.type | Type.MASK_HEAP.value) > 0: e1.data &= 0x00000000FFFFFFFF if (e2.type | Type.MASK_HEAP.value) > 0: e2.data &= 0x00000000FFFFFFFF report_dataleak(queues[0].callstack, e1, e2) else: # This should never happen. We miss some conditional branches in the code debug(0, "Missed some branch (inner miss)") assert (False) else: # This should never happen. We miss some conditional branches in the code debug(0, "Missed some branch (outer miss)") assert (False)