def add_call_node(self,vtx,parent,econd): """When a (parent) block performs a call, the (vtx) targeted block will not be linked with its parent but rather will possibly start a new connected component of the cfg. When the component is declared as a function, the parent block is linked to a new node that embeds the function instead. """ b = vtx.data callers = b.misc['callers'] if callers: if parent in callers: for n in parent.N(+1): if vtx.data.address == n.data.address: return n return None callers.append(parent) else: logger.verbose('block %s starts a new cfg component'%vtx.name) b.misc['callers'] = [parent] b.misc[code.tag.FUNC_START]+=1 parent.data.misc[code.tag.FUNC_CALL] += 1 if b.misc['func']: logger.verbose('function %s called'%b.misc['func']) vtx = cfg.node(b.misc['func']) e = parent.c.add_edge(cfg.link(parent,vtx,data=econd)) vtx = e.v[1] else: vtx = self.G.add_vertex(vtx) return vtx
def check_ext_target(self,t): if t.cst is None: return False if t.cst._is_ext: b = code.xfunc(t.cst) vtx = cfg.node(b) e = cfg.link(t.parent,vtx,data=t.econd) e = t.parent.c.add_edge(e) self.update_spool(e.v[1],t.parent) return True return False
def get_targets(self, node, parent): """Computes expression of target address in the given node, based on backward evaluation of all *first-parent* symbolic maps, until the program counter (PC) expression is a constant or the function entry block is reached. Arguments: node: the current node, not yet added to the cfg. parent: the parent node in the cfg that has targeted the current node. Returns: :class:`_target`: the PC expression evaluated from composition of *first-parent-path* symbolic maps. """ pc = self.prog.cpu.PC() n = node mpc = pc while True: m = n.data.map.use((pc, n.data.address)) mpc = m(mpc) T = _target(mpc, node).expand() if len(T) > 0: return T try: n = n.N(-1)[0] # get first parent node (parent arg is unused) except IndexError: break # we are at function entry node # create func nodes: xpc = [] if n.data.misc[code.tag.FUNC_START]: if node.data.misc[code.tag.FUNC_END]: n.data.misc[code.tag.FUNC_START] += 1 try: fsym = n.data.misc['callers'][0].data.misc['to'].ref except (IndexError, TypeError, AttributeError): fsym = 'f' func = code.func(n.c, name="%s:%s" % (fsym, n.name)) logger.verbose("function %s created" % func) if mpc._is_mem and len(mpc.mods) > 0: pol = '(assume_no_aliasing)' if self.policy[ 'frame-aliasing'] == False else '' logger.verbose("pc is memory aliased in %s %s" % (str(func), pol)) if self.policy['frame-aliasing'] == False: mpc.mods = [] func.map[pc] = mpc for cn in n.data.misc['callers']: cnpc = cn.data.map.use((pc, cn.data.address))(mpc) f = cfg.node(func) e = cn.c.add_edge(cfg.link(cn, f)) xpc.extend(_target(cnpc, e.v[1]).expand()) n.data.misc['func'] = func else: xpc.extend(_target(mpc, node).expand()) return xpc
def itercfg(self, loc=None): """A generic *forward* analysis explorer. The default policy is *depth-first* search (use policy=0 for breadth-first search.) The ret instructions are not followed (see lbackward analysis). Arguments: loc (Optional[cst]): the address to start the cfg recovery (defaults to the program's entrypoint). Yields: :class:`cfg.node`: every nodes added to the graph. """ G = self.G # spool is the list of targets (target_ instances) to be analysed self.init_spool(loc) # order is the index to pop elements from spool order = -1 if self.policy['depth-first'] else 0 # lazy is a flag to fallback to linear sweep lazy = self.policy['branch-lazy'] # proceed with exploration of every spool element: while len(self.spool) > 0: t = self.spool.pop(order) parent = t.parent econd = t.econd if self.check_ext_target(t): continue for b in self.iterblocks(loc=t.cst): vtx = G.get_by_name(b.name) or cfg.node(b) do_update = (vtx not in G) # if block is a FUNC_START, we add it as a new graph component (no link to parent), # otherwise we add the new (parent,vtx) edge. if parent is None: self.add_root_node(vtx) elif parent.data.misc[code.tag.FUNC_CALL]: vtx = self.add_call_node(vtx, parent, econd) else: if parent.data.misc['cut']: continue e_ = cfg.link(parent, vtx, data=econd) e = G.add_edge(e_) if e is e_: logger.verbose(u'edge %s added' % e) # now we try to populate spool with target addresses of current block: if do_update: self.update_spool(vtx, parent) self.check_func(vtx) yield vtx if (not do_update or not lazy or vtx.data.misc[code.tag.FUNC_END]): break logger.verbose(u"lsweep fallback at %s" % vtx.data.name) parent = vtx econd = None
def getcfg(self,loc=None): F = [] for b in self.iterblocks(loc): if b.misc[code.tag.FUNC_START]: f = cfg.func() if b.misc[code.tag.FUNC_END]: F.append(f) try: f.add_vertex(cfg.node(b)) except NameError: logger.warning('linear sweep orfan block %s'%b.name) F.append(b) return F
def check_func(self, node): """Check if vtx node creates a function. In the fforward method this method does nothing. """ if node is None: return for t in self.spool: if t.parent in node.c: return # create func object: f = code.func(node.c) alf = code.mapper.assume_no_aliasing code.mapper.assume_no_aliasing = not self.policy['frame-aliasing'] cxl = code.op.threshold() code.op.limit(self.policy['complexity']) SIG_FUNC.emit(args=f) m = f.makemap() # get pc @ node: pc = self.prog.cpu.PC() mpc = m(pc) T = _target(mpc, node).expand() # if a target is defined here, it means that func cfg is not completed # so we can return now : if len(T) > 0: logger.verbose('extending cfg of %s (new target found)' % f) for t in T: for k, v in f.misc['heads'].items(): if v(pc) == t.cst: t.parent = k else: logger.info('lbackward: function %s done' % f) f.map = m #self.prog.codehelper(func=f) mpc = f.map(pc) roots = f.view.layout.layers[0] assert len(roots) > 0 nroot = roots[0] nroot.data.misc['func'] = f try: fsym = nroot.data.misc['callers'][0].data.misc['to'].ref except (IndexError, TypeError, AttributeError): fsym = 'f' f.name = "%s:%s" % (fsym, nroot.name) self.prog.codehelper(func=f) for cn in nroot.data.misc['callers']: cnpc = cn.data.map(mpc) fn = cfg.node(f) e = cn.c.add_edge(cfg.link(cn, fn)) logger.verbose('edge %s added' % str(e)) T.extend(_target(cnpc, e.v[1]).expand()) code.mapper.assume_no_aliasing = alf code.op.limit(cxl) self.spool.extend(T)
def getcfg(self,loc=None): spool = self.init_spool(loc) order = -1 if self.policy['depth-first'] else 0 lazy = self.policy['branch-lazy'] F = cfg.func() pc = self.prog.PC() while len(spool)>0: current,parent = spool.pop(order) for b in self.iterblocks(loc=current): err = '%s analysis failed at block %s'%(self.__class__.__name__,b.name) sta,sto = b.support vtx = cfg.node(b) if vtx in F.V(): break if parent is None or (parent.data.address is None): b.misc[code.tag.FUNC_START]=1 F.add_vertex(vtx) logger.verbose('root node %s added'%vtx.name) else: if b.misc[code.tag.FUNC_START] and parent.data.misc[code.tag.FUNC_CALL]: b.misc[code.tag.FUNC_START]+=1 F.add_vertex(vtx) logger.verbose('function node %s added'%vtx.name) else: e = cfg.link(parent,vtx) F.add_edge(e) logger.verbose('edge %s added'%e) # continue and update spool... target = self.get_target(b,withmap=parent) parent = vtx if target==sto: continue elif target._is_cst: spool.append((target,parent)) if not lazy: break elif target._is_tst: t1 = target.l t2 = target.r if t1._is_cst: spool.append((t1,parent)) else: logger.info(err+' (true branch)') if t2._is_cst: spool.append((t2,parent)) else: logger.info(err+' (false branch)') break else: logger.info(err) if not lazy: break return F
def check_ext_target(self,t): """check if the target is the address of an external function. If True, the :class:`code.xfunc` node is linked to the parent and the spool is updated with this node. Returns: `True` if target is external, `False` otherwise. """ if t.cst is None: return False if t.cst._is_ext: b = code.xfunc(t.cst) vtx = cfg.node(b) e = cfg.link(t.parent,vtx,data=t.econd) e = t.parent.c.add_edge(e) self.update_spool(e.v[1],t.parent) self.check_func(e.v[1]) return True return False
def get_targets(self, node, parent): pc = self.prog.cpu.PC() n = node mpc = pc while True: m = n.data.map.use((pc, n.data.address)) mpc = m(mpc) T = _target(mpc, node).expand() if len(T) > 0: return T try: n = n.N(-1)[0] # get first parent node (parent arg is unused) except IndexError: break # we are at function entry node # create func nodes: xpc = [] if n.data.misc[code.tag.FUNC_START]: if node.data.misc[code.tag.FUNC_END]: n.data.misc[code.tag.FUNC_START] += 1 try: fsym = n.data.misc['callers'][0].data.misc['to'].ref except (IndexError, TypeError, AttributeError): fsym = 'f' func = code.func(n.c, name="%s:%s" % (fsym, n.name)) logger.verbose("function %s created" % func) if mpc._is_mem and len(mpc.mods) > 0: pol = '(assume_no_aliasing)' if self.policy[ 'frame-aliasing'] == False else '' logger.verbose("pc is memory aliased in %s %s" % (str(func), pol)) if self.policy['frame-aliasing'] == False: mpc.mods = [] func.map[pc] = mpc for cn in n.data.misc['callers']: cnpc = cn.data.map.use((pc, cn.data.address))(mpc) f = cfg.node(func) e = cn.c.add_edge(cfg.link(cn, f)) xpc.extend(_target(cnpc, e.v[1]).expand()) n.data.misc['func'] = func else: xpc.extend(_target(mpc, node).expand()) return xpc
def itercfg(self, loc=None): G = self.G # spool is the list of (target,parent) addresses to be analysed self.init_spool(loc) # order is the index to pop elements from spool order = -1 if self.policy['depth-first'] else 0 # lazy is a flag to fallback to linear sweep lazy = self.policy['branch-lazy'] # proceed with exploration of every spool element: while len(self.spool) > 0: t = self.spool.pop(order) parent = t.parent econd = t.econd if self.check_ext_target(t): continue for b in self.iterblocks(loc=t.cst): vtx = G.get_by_name(b.name) or cfg.node(b) do_update = (vtx not in G) # if block is a FUNC_START, we add it as a new graph component (no link to parent), # otherwise we add the new (parent,vtx) edge. if parent is None: self.add_root_node(vtx) elif parent.data.misc[code.tag.FUNC_CALL] > 0: vtx = self.add_call_node(vtx, parent, econd) else: e_ = cfg.link(parent, vtx, data=econd) e = G.add_edge(e_) if e is e_: logger.verbose('edge %s added' % e) # now we try to populate spool with target addresses of current block: if do_update: self.update_spool(vtx, parent) self.check_func(vtx) yield vtx if (not do_update or not lazy or vtx.data.misc[code.tag.FUNC_END]): break logger.verbose("lsweep fallback at %s" % vtx.data.name) parent = vtx econd = None
def add_call_node(self,vtx,parent,econd): b = vtx.data callers = b.misc['callers'] if callers: if parent in callers: for n in parent.N(+1): if vtx.data.address == n.data.address: return n return None callers.append(parent) else: logger.verbose('block %s starts a new cfg component'%vtx.name) b.misc['callers'] = [parent] b.misc[code.tag.FUNC_START]+=1 parent.data.misc[code.tag.FUNC_CALL] += 1 if b.misc['func']: logger.verbose('function %s called'%b.misc['func']) vtx = cfg.node(b.misc['func']) e = parent.c.add_edge(cfg.link(parent,vtx,data=econd)) vtx = e.v[1] else: vtx = self.G.add_vertex(vtx) return vtx
def get_targets(self,node,parent): pc = self.prog.cpu.PC() n = node mpc = pc while True: m = n.data.map.use((pc,n.data.address)) mpc = m(mpc) T = _target(mpc,node).expand() if len(T)>0: return T try: n = n.N(-1)[0] # get first parent node (parent arg is unused) except IndexError: break # we are at function entry node # create func nodes: xpc = [] if n.data.misc[code.tag.FUNC_START]: if node.data.misc[code.tag.FUNC_END]: n.data.misc[code.tag.FUNC_START] += 1 try: fsym = n.data.misc['callers'][0].data.misc['to'].ref except (IndexError,TypeError,AttributeError): fsym = 'f' func = code.func(n.c,name="%s:%s"%(fsym,n.name)) logger.verbose("function %s created"%func) if mpc._is_mem and len(mpc.mods)>0: pol = '(assume_no_aliasing)' if self.policy['frame-aliasing']==False else '' logger.verbose("pc is memory aliased in %s %s"%(str(func),pol)) if self.policy['frame-aliasing']==False: mpc.mods = [] func.map[pc] = mpc for cn in n.data.misc['callers']: cnpc = cn.data.map.use((pc,cn.data.address))(mpc) f = cfg.node(func) e = cn.c.add_edge(cfg.link(cn,f)) xpc.extend(_target(cnpc,e.v[1]).expand()) n.data.misc['func'] = func else: xpc.extend(_target(mpc,node).expand()) return xpc
def itercfg(self,loc=None): G = self.G # spool is the list of (target,parent) addresses to be analysed self.init_spool(loc) # order is the index to pop elements from spool order = -1 if self.policy['depth-first'] else 0 # lazy is a flag to fallback to linear sweep lazy = self.policy['branch-lazy'] # proceed with exploration of every spool element: while len(self.spool)>0: t = self.spool.pop(order) if t.dirty: continue parent = t.parent econd = t.econd if self.check_ext_target(t): continue for b in self.iterblocks(loc=t.cst): vtx = G.get_by_name(b.name) or cfg.node(b) b = vtx.data # if block is a FUNC_START, we add it as a new graph component (no link to parent), # otherwise we add the new (parent,vtx) edge. if parent is None: self.add_root_node(vtx) elif parent.data.misc[code.tag.FUNC_CALL]: vtx = self.add_call_node(vtx,parent,econd) else: e = cfg.link(parent,vtx,data=econd) e = G.add_edge(e) if e is not None: logger.verbose('edge %s added'%e) # now we try to populate spool with target addresses of current block: self.update_spool(vtx,parent) yield vtx if not lazy or b.misc[code.tag.FUNC_END]: break logger.verbose("lsweep fallback at %s"%b.name) parent = vtx econd = None
def build(self,cpu): g = graph() nodes = dict([(b.name,node(b.build(cpu))) for b in self.nodes]) for l in [link(nodes[n1],nodes[n2]) for (n1,n2) in self.links]: g.add_edge(l) return g
def build(self, cpu): g = graph() nodes = dict([(b.name, node(b.build(cpu))) for b in self.nodes]) for l in [link(nodes[n1], nodes[n2]) for (n1, n2) in self.links]: g.add_edge(l) return g
def getcfg(self,loc=None): from collections import OrderedDict,defaultdict D = OrderedDict() C = defaultdict(lambda: []) for b in self.iterblocks(loc): n = cfg.node(b) D[n.data.address] = n # now we have collected an overapprox. of all blocks, # lets link those "super-blocks" together: while len(D)>0: k,n = D.popitem(last=False) # add node (does nothing if n is already in G) n = self.G.add_vertex(n) b = n.data # find its links: if b.misc['func_call']: aret = b.misc['retto']+0 nret = D.get(aret,None) or self.G.get_with_address(aret) if nret is not None: e = cfg.link(n,nret) self.G.add_edge(e) ato = b.misc['to'] if ato is not 0: C[ato+0].append((e,'func_call')) elif b.misc['func_goto'] and b.misc['to'] is not 0: ato = b.misc['to']+0 nto = D.get(ato,None) or self.G.get_with_address(ato) if nto is not None: e = cfg.link(n,nto) self.G.add_edge(e) else: C[ato].append((n,'to')) if b.misc['cond']: ato = n.data.support[1] nto = D.get(ato,None) or self.G.get_with_address(ato) if nto is not None: e = cfg.link(n,nto,data=b.misc['cond'][1]) self.G.add_edge(e) else: C[ato].append((n,b.misc['cond'])) # now all super-blocks have been processed, but some may need # to be cut, lets handle those missed targets: while len(C)>0: ato,L = C.popitem() n = cfg.node(next(self.iterblocks(ato))) n = self.G.add_vertex(n) for (p,why) in L: if why is 'func_call': if n.data.misc['callers']: n.data.misc['callers'].append(p) else: n.data.misc['callers']=[p] else: e = cfg.link(p,n) self.G.add_edge(e) # finally create func objects and insert nodes: for n in self.G.V(): if n.data.misc['callers']: n.data.misc['func'] = f = code.func(n.c) calls = [] for e in n.data.misc['callers']: fn = cfg.node(f) p = e.v[0] p.c.add_edge(cfg.link(p,fn)) p.c.add_edge(cfg.link(fn,e.v[1])) p.c.remove_edge(e) calls.append(p) n.data.misc['callers'] = calls return self.G
def get_targets(self,node,parent): pc = self.prog.cpu.PC() alf = code.mapper.assume_no_aliasing code.mapper.assume_no_aliasing = not self.policy['frame-aliasing'] # try fforward first: T = fforward.get_targets(self,node,parent) if len(T)>0: code.mapper.assume_no_aliasing = alf return T # create func object: f = code.func(node.c) m = f.backward(node) if m is None: logger.verbose('dead end at %s'%node.name) else: m = m.use((pc,f.address)) # get pc @ node: mpc = m(pc) T = _target(mpc,node).expand() # if a target is defined here, it means that func cfg is not completed # so we can return now : if len(T)>0: code.mapper.assume_no_aliasing = alf return T # otherwise if func cfg is complete compute pc out of function callers: xpc = [] # check if a leaf is still going to be explored for x in f.cfg.leaves(): if x in (s.parent for s in self.spool): code.mapper.assume_no_aliasing = alf return xpc # f is now fully explored so we can "return" to callers: logger.info('lbackward: function %s done'%f) # cleanup spool: for t in self.spool: if t.parent.c is f.cfg: t.dirty=True # if needed compute the full map: if f.misc['partial']: m = f.makemap() f.map = m self.prog.codehelper(func=f) mpc = f.map(pc) roots = filter(lambda n: n.data.misc[code.tag.FUNC_START],f.cfg.sV) if len(roots)<=0: code.mapper.assume_no_aliasing = alf return xpc if len(roots)>1: logger.verbose('lbackward: multiple entries into function %s ?!'%f) nroot = roots[0] nroot.data.misc['func'] = f try: fsym = nroot.data.misc['callers'][0].data.misc['to'].ref except (IndexError,TypeError,AttributeError): fsym = 'f' f.name = "%s:%s"%(fsym,nroot.name) for cn in nroot.data.misc['callers']: cnpc = cn.data.map.use((pc,cn.data.address))(mpc) fn = cfg.node(f) e = cn.c.add_edge(cfg.link(cn,fn)) xpc.extend(_target(cnpc,e.v[1]).expand()) code.mapper.assume_no_aliasing = alf return xpc
def getcfg(self,loc=None): """the most basic cfg recovery method: it assumes that calls always return to the following block, and links blocks based on direct concrete targets without computing any symbolic map. Its *fast* but probably very wrong... """ from collections import OrderedDict,defaultdict D = OrderedDict() C = defaultdict(lambda: []) for b in self.iterblocks(loc): n = cfg.node(b) D[n.data.address] = n # now we have collected an overapprox. of all blocks, # lets link those "super-blocks" together: while len(D)>0: k,n = D.popitem(last=False) # add node (does nothing if n is already in G) n = self.G.add_vertex(n) b = n.data # find its links: if b.misc[code.tag.FUNC_CALL]: aret = b.misc['retto']+0 nret = D.get(aret,None) or self.G.get_with_address(aret) if nret is not None: e = cfg.link(n,nret) self.G.add_edge(e) ato = b.misc['to'] if ato is not 0 and b.misc[code.tag.FUNC_CALL]>0: C[ato+0].append((e,code.tag.FUNC_CALL)) elif b.misc[code.tag.FUNC_GOTO] and b.misc['to'] is not 0: ato = b.misc['to']+0 nto = D.get(ato,None) or self.G.get_with_address(ato) if nto is not None: e = cfg.link(n,nto) self.G.add_edge(e) else: C[ato].append((n,'to')) if b.misc['cond']: ato = n.data.support[1] nto = D.get(ato,None) or self.G.get_with_address(ato) if nto is not None: e = cfg.link(n,nto,data=b.misc['cond'][1]) self.G.add_edge(e) else: C[ato].append((n,b.misc['cond'])) # now all super-blocks have been processed, but some may need # to be cut, lets handle those missed targets: while len(C)>0: ato,L = C.popitem() ib = self.iterblocks(ato) n = cfg.node(next(ib)) ib.close() n = self.G.add_vertex(n) for (p,why) in L: if why is code.tag.FUNC_CALL: if n.data.misc['callers']: n.data.misc['callers'].append(p) else: n.data.misc['callers']=[p] else: e = cfg.link(p,n) self.G.add_edge(e) # finally create func objects and insert nodes: for n in self.G.V(): if n.data.misc['callers']: n.data.misc['func'] = f = code.func(n.c) calls = [] for e in n.data.misc['callers']: fn = cfg.node(f) p = e.v[0] p.c.add_edge(cfg.link(p,fn)) p.c.add_edge(cfg.link(fn,e.v[1])) p.c.remove_edge(e) calls.append(p) n.data.misc['callers'] = calls return self.G
def init_spool(self,loc): return [(loc,cfg.node(code.block([])))]