class basicblocs: def __init__(self, ab=[]): self.blocs = {} self.g = DiGraph() self.add_blocs(ab) def add(self, b): self.blocs[b.label] = b self.g.add_node(b.label) for dst in b.bto: if isinstance(dst.label, asm_label): self.g.add_edge(b.label, dst.label) def add_blocs(self, ab): for b in ab: self.add(b) def get_bad_dst(self): o = set() for b in self.blocs.values(): for c in b.bto: if c.c_t == asm_constraint.c_bad: o.add(b) return o
def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): for irblock in ir_arch.blocks.values(): print irblock ir_arch.dead_simp() irblock_0 = None for irblock in ir_arch.blocks.values(): if irblock.label.offset == ad: irblock_0 = irblock break assert (irblock_0 is not None) flow_graph = DiGraph() flow_graph.node2str = lambda n: node2str(flow_graph, n) for irblock in ir_arch.blocks.values(): block_flow_cb(ir_arch, flow_graph, irblock) for irblock in ir_arch.blocks.values(): print irblock print 'IN', [str(x) for x in irblock.in_nodes] print 'OUT', [str(x) for x in irblock.out_nodes] print '*' * 20, 'interblock', '*' * 20 inter_bloc_flow(ir_arch, flow_graph, irblock_0.label) # from graph_qt import graph_qt # graph_qt(flow_graph) open('data.dot', 'w').write(flow_graph.dot())
def gen_bloc_data_flow_graph(ir_arch, ad, block_flow_cb): for irbloc in ir_arch.blocs.values(): print irbloc ir_arch.gen_graph() ir_arch.dead_simp() irbloc_0 = None for irbloc in ir_arch.blocs.values(): if irbloc.label.offset == ad: irbloc_0 = irbloc break assert(irbloc_0 is not None) flow_graph = DiGraph() flow_graph.node2str = lambda n: node2str(flow_graph, n) for irbloc in ir_arch.blocs.values(): block_flow_cb(ir_arch, flow_graph, irbloc) for irbloc in ir_arch.blocs.values(): print irbloc print 'IN', [str(x) for x in irbloc.in_nodes] print 'OUT', [str(x) for x in irbloc.out_nodes] print '*' * 20, 'interbloc', '*' * 20 inter_bloc_flow(ir_arch, flow_graph, irbloc_0.label) # from graph_qt import graph_qt # graph_qt(flow_graph) open('data.dot', 'w').write(flow_graph.dot())
def as_graph(self, starting_nodes): """Return a DiGraph corresponding to computed dependencies, with @starting_nodes as leafs @starting_nodes: set of DependencyNode instance """ # Build subgraph for each starting_node subgraphs = [] for starting_node in starting_nodes: subgraphs.append(self._build_depGraph(starting_node)) # Merge subgraphs into a final DiGraph graph = DiGraph() for sourcegraph in subgraphs: for node in sourcegraph.nodes(): graph.add_node(node) for edge in sourcegraph.edges(): graph.add_uniq_edge(*edge) return graph
def as_graph(self): """Generates a Digraph of dependencies""" graph = DiGraph() for node_a, node_b in self.links: if not node_b: graph.add_node(node_a) else: graph.add_edge(node_a, node_b) for parent, sons in self.pending.iteritems(): for son in sons: graph.add_edge(parent, son) return graph
def gen_graph(self, link_all=True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int( d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name)
def __init__(self, segments, abicls, machine): self.segments = segments self.abicls = abicls self.input_reg = {} self.output_reg = {} self._previous_addr = 0 self._current_addr = 0 self.paths = DiGraph() self.in_memory = {} self.out_memory = {} self.refs = {} self._ira = Machine(machine).ira() self._ptr_size = self._ira.sizeof_pointer() / 8 self.sp = self._ira.sp.name
def __init__(self, abicls, machine): self.abicls = abicls self.input_reg = {} self.output_reg = {} self._previous_addr = 0 self._current_addr = 0 self._instr_count = 0 self._pending_call = [] # Function addr -> list of information on calls self.function_calls = {} self.paths = DiGraph() self.in_memory = {} self.out_memory = {} self._ira = Machine(machine).ira() self._ptr_size = self._ira.sizeof_pointer() / 8 self.sp = self._ira.sp.name
def gen_bloc_data_flow_graph( ir_arch, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): out_str = "" # ir_arch = ir_x86_32(symbol_pool) for irbloc in ir_arch.blocs.values(): print irbloc ir_arch.gen_graph() ir_arch.dead_simp() irbloc_0 = None for irbloc in ir_arch.blocs.values(): if irbloc.label.offset == ad: irbloc_0 = irbloc break assert (irbloc_0 is not None) flow_graph = DiGraph() flow_graph.node2str = lambda n: node2str(flow_graph, n) done = set() todo = set([irbloc_0.label]) bloc2w = {} for irbloc in ir_arch.blocs.values(): intra_bloc_flow_raw(ir_arch, flow_graph, irbloc) # intra_bloc_flow_symb(ir_arch, flow_graph, irbloc) for irbloc in ir_arch.blocs.values(): print irbloc print 'IN', [str(x) for x in irbloc.in_nodes] print 'OUT', [str(x) for x in irbloc.out_nodes] print '*' * 20, 'interbloc', '*' * 20 inter_bloc_flow(ir_arch, flow_graph, irbloc_0.label) # sys.path.append('/home/serpilliere/projet/m2_devel/miasm2/core') # from graph_qt import graph_qt # graph_qt(flow_graph) open('data.txt', 'w').write(flow_graph.dot())
def gen_bloc_data_flow_graph(ir_arch, in_str, ad): # arch, attrib, pool_bin, bloc, symbol_pool): out_str = "" # ir_arch = ir_x86_32(symbol_pool) for irbloc in ir_arch.blocs.values(): print irbloc ir_arch.gen_graph() ir_arch.dead_simp() irbloc_0 = None for irbloc in ir_arch.blocs.values(): if irbloc.label.offset == ad: irbloc_0 = irbloc break assert(irbloc_0 is not None) flow_graph = DiGraph() flow_graph.node2str = lambda n: node2str(flow_graph, n) done = set() todo = set([irbloc_0.label]) bloc2w = {} for irbloc in ir_arch.blocs.values(): intra_bloc_flow_raw(ir_arch, flow_graph, irbloc) # intra_bloc_flow_symb(ir_arch, flow_graph, irbloc) for irbloc in ir_arch.blocs.values(): print irbloc print 'IN', [str(x) for x in irbloc.in_nodes] print 'OUT', [str(x) for x in irbloc.out_nodes] print '*' * 20, 'interbloc', '*' * 20 inter_bloc_flow(ir_arch, flow_graph, irbloc_0.label) # sys.path.append('/home/serpilliere/projet/m2_devel/miasm2/core') # from graph_qt import graph_qt # graph_qt(flow_graph) open('data.txt', 'w').write(flow_graph.dot())
def gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb): for irblock in ircfg.blocks.values(): print irblock dead_simp(ir_arch, ircfg) irblock_0 = None for irblock in ircfg.blocks.values(): loc_key = irblock.loc_key offset = ircfg.loc_db.get_location_offset(loc_key) if offset == ad: irblock_0 = irblock break assert(irblock_0 is not None) flow_graph = DiGraph() flow_graph.node2str = lambda n: node2str(flow_graph, n) irb_in_nodes = {} irb_out_nodes = {} for label in ircfg.blocks: irb_in_nodes[label] = {} irb_out_nodes[label] = {} for label, irblock in ircfg.blocks.iteritems(): block_flow_cb(ir_arch, ircfg, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) for label in ircfg.blocks: print label print 'IN', [str(x) for x in irb_in_nodes[label]] print 'OUT', [str(x) for x in irb_out_nodes[label]] print '*' * 20, 'interblock', '*' * 20 inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) # from graph_qt import graph_qt # graph_qt(flow_graph) open('data.dot', 'w').write(flow_graph.dot())
def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): for irblock in ir_arch.blocks.values(): print irblock dead_simp(ir_arch) irblock_0 = None for irblock in ir_arch.blocks.values(): loc_key = irblock.loc_key offset = ir_arch.loc_db.get_location_offset(loc_key) if offset == ad: irblock_0 = irblock break assert (irblock_0 is not None) flow_graph = DiGraph() flow_graph.node2str = lambda n: node2str(flow_graph, n) irb_in_nodes = {} irb_out_nodes = {} for label in ir_arch.blocks: irb_in_nodes[label] = {} irb_out_nodes[label] = {} for label, irblock in ir_arch.blocks.iteritems(): block_flow_cb(ir_arch, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label]) for label in ir_arch.blocks: print label print 'IN', [str(x) for x in irb_in_nodes[label]] print 'OUT', [str(x) for x in irb_out_nodes[label]] print '*' * 20, 'interblock', '*' * 20 inter_block_flow(ir_arch, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) # from graph_qt import graph_qt # graph_qt(flow_graph) open('data.dot', 'w').write(flow_graph.dot())
def unflatGraph(flat_graph): graph = DiGraph() nodes, edges = flat_graph for node in nodes: graph.add_node(node) for nodeA, nodeB in edges: graph.add_edge(nodeA, nodeB) return graph
def blist2graph(ab): """ ab: list of asmbloc return: graph of asmbloc """ g = DiGraph() g.lbl2bloc = {} for b in ab: g.lbl2bloc[b.label] = b g.add_node(b.label) for x in b.bto: g.add_edge(b.label, x.label) return g
def gen_graph(self, link_all=True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int(d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name)
def __init__(self, abicls, machine): self.abicls = abicls self.input_reg = {} self.output_reg = {} self._previous_addr = 0 self._current_addr = 0 self._instr_count = 0 self._pending_call = [] # Function addr -> list of information on calls self.function_calls = {} self.paths = DiGraph() self.in_memory = {} self.out_memory = {} self._ira = Machine(machine).ira() self._ptr_size = self._ira.sizeof_pointer()/8 self.sp = self._ira.sp.name
def as_graph(self, starting_nodes): """Return a DiGraph corresponding to computed dependencies, with @starting_nodes as leafs @starting_nodes: set of DependencyNode instance """ # Build subgraph for each starting_node subgraphs = [] for starting_node in starting_nodes: subgraphs.append(self._build_depgraph(starting_node)) # Merge subgraphs into a final DiGraph graph = DiGraph() for sourcegraph in subgraphs: for node in sourcegraph.nodes(): graph.add_node(node) for edge in sourcegraph.edges(): graph.add_uniq_edge(*edge) return graph
class ira: def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) out = set() done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if not o in found: follow.add(o) todo = follow out = self.sort_dst(todo, done) return done def gen_graph(self, link_all = False): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId( self.symbol_pool.getby_offset_create(int(d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if not lbl in self.blocs: continue b = self.blocs[lbl] ir_txt = [str(lbl)] for irs in b.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[id(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): out += '%s -> %s;\n' % (id(a), id(b)) out += '}' return out def remove_dead(self, b): for ir, _, c_out in zip(b.irs, b.c_in, b.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif not i_cur.dst in c_out: del(ir[j]) continue j += 1 def remove_blocs_dead(self): for b in self.blocs.values(): self.remove_dead(b) # for test XXX TODO def set_dead_regs(self, b): pass def add_unused_regs(self): pass def compute_in_out(self, b): # get out/in from bloc sons modified = False # set b in if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))): modified = True b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))) # set b out c_out = set() has_son = False for n_son in self.g.successors(b.label): # print n_me, n_son has_son = True if not n_son in self.blocs: print "leaf has lost her sons!" continue b_son = self.blocs[n_son] c_out.update(b_son.c_in[0]) if not has_son: # special case: leaf nodes architecture dependant c_out = self.get_out_regs(b) if b.c_out[-1] != set(c_out): modified = True b.c_out[-1] = set(c_out) # get out/in for bloc for i in reversed(xrange(len(b.irs))): if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))): modified = True b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i]))) if b.c_out[i] != set(b.c_in[i + 1]): modified = True b.c_out[i] = set(b.c_in[i + 1]) return modified def test_in_out_fix(self): fixed = True for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue b = self.blocs[n] if b.c_in != b.l_in or b.c_out != b.l_out: fixed = False b.l_in = [set(x) for x in b.c_in] b.l_out = [set(x) for x in b.c_out] return fixed def compute_dead(self): self.get_rw() it = 0 fixed_point = False print 'iteration...', while not fixed_point: print it, it += 1 for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue b = self.blocs[n] self.compute_in_out(b) fixed_point = self.test_in_out_fix() print def dead_simp(self): self.compute_dead() self.remove_blocs_dead() self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb for eq in eqs: eq irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
class ira: def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) out = set() done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if o not in found: follow.add(o) todo = follow out = self.sort_dst(todo, done) return done def gen_graph(self, link_all=True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int( d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if lbl not in self.blocs: continue irb = self.blocs[lbl] ir_txt = [str(lbl)] for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): # print 'edge', a, b, hash(a), hash(b) out += '%s -> %s;\n' % (hash(a), hash(b)) out += '}' return out def remove_dead(self, irb): """Remove dead affectations using previous liveness analysis @irb: irbloc instance Return True iff the bloc state has changed PRE: compute_in_out(@irb) """ # print 'state1' # self.dump_bloc_state(irb) modified = False for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass elif i_cur.dst == self.IRDst: # never delete irdst pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif i_cur.dst not in c_out: del (ir[j]) modified = True continue j += 1 # print 'state2' # self.dump_bloc_state(irb) return modified def remove_blocs_dead(self): """Call remove_dead on each irbloc Return True iff one of the bloc state has changed """ modified = False for b in self.blocs.values(): modified |= self.remove_dead(b) return modified # for test XXX TODO def set_dead_regs(self, b): pass def add_unused_regs(self): pass def dump_bloc_state(self, irb): print '*' * 80 for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)): print 'ir' for x in ir: print '\t', x print 'R', [str(x) for x in irb.r[i]] #c_in] print 'W', [str(x) for x in irb.w[i]] #c_out] print 'IN', [str(x) for x in c_in] print 'OUT', [str(x) for x in c_out] def compute_in_out(self, irb): """Liveness computation for a single bloc @irb: irbloc instance Return True iff bloc state has changed """ modified = False # Compute OUT for last irb entry c_out = set() has_son = False for n_son in self.g.successors(irb.label): has_son = True if n_son not in self.blocs: # If the son is not defined, we will propagate our current out # nodes to the in nodes's son son_c_in = irb.c_out_missing else: son_c_in = self.blocs[n_son].c_in[0] c_out.update(son_c_in) if not has_son: # Special case: leaf nodes architecture dependant c_out = self.get_out_regs(irb) if irb.c_out[-1] != c_out: irb.c_out[-1] = c_out modified = True # Compute out/in intra bloc for i in reversed(xrange(len(irb.irs))): new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i]))) if irb.c_in[i] != new_in: irb.c_in[i] = new_in modified = True if i >= len(irb.irs) - 1: # Last out has been previously updated continue new_out = set(irb.c_in[i + 1]) if irb.c_out[i] != new_out: irb.c_out[i] = new_out modified = True return modified def test_in_out_fix(self): """Return True iff a fixed point has been reached during liveness analysis""" fixed = True for node in self.g.nodes(): if node not in self.blocs: # leaf has lost her son continue irb = self.blocs[node] if irb.c_in != irb.l_in or irb.c_out != irb.l_out: fixed = False irb.l_in = [set(x) for x in irb.c_in] irb.l_out = [set(x) for x in irb.c_out] return fixed def fill_missing_son_c_in(self): """Find nodes with missing sons in graph, and add virtual link to all written variables of all parents. PRE: gen_graph() and get_rw()""" for node in self.g.nodes(): if node not in self.blocs: continue self.blocs[node].c_out_missing = set() has_all_son = True for node_son in self.g.successors(node): if node_son not in self.blocs: has_all_son = False break if has_all_son: continue parents = self.g.reachable_parents(node) for parent in parents: irb = self.blocs[parent] for var_w in irb.w: self.blocs[node].c_out_missing.update(var_w) def compute_dead(self): """Iterate liveness analysis until a fixed point is reached. PRE: gen_graph() """ it = 0 fixed_point = False log.debug('iteration...') while not fixed_point: log.debug(it) it += 1 for n in self.g.nodes(): if n not in self.blocs: # leaf has lost her son continue irb = self.blocs[n] self.compute_in_out(irb) fixed_point = self.test_in_out_fix() def dead_simp(self): """This function is used to analyse relation of a * complete function * This mean the blocs under study represent a solid full function graph. Ref: CS 5470 Compiler Techniques and Principles (Liveness analysis/Dataflow equations) PRE: call to gen_graph """ modified = True while modified: log.debug('dead_simp step') # Update r/w variables for all irblocs self.get_rw() # Fill c_in for missing sons self.fill_missing_son_c_in() # Liveness step self.compute_dead() modified = self.remove_blocs_dead() # Simplify expressions self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
class Snapshot(object): @classmethod def get_byte(cls, value, byte): '''Return the byte @byte of the value''' return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte)) @classmethod def unpack_ptr(cls, value): return struct.unpack('@P', value)[0] def __init__(self, abicls, machine): self.abicls = abicls self.input_reg = {} self.output_reg = {} self._previous_addr = 0 self._current_addr = 0 self._instr_count = 0 self._pending_call = [] # Function addr -> list of information on calls self.function_calls = {} self.paths = DiGraph() self.in_memory = {} self.out_memory = {} self._ira = Machine(machine).ira() self._ptr_size = self._ira.sizeof_pointer() / 8 self.sp = self._ira.sp.name def add_input_register(self, reg_name, reg_value): self.input_reg[reg_name] = reg_value def add_output_register(self, reg_name, reg_value): self.output_reg[reg_name] = reg_value def add_memory_read(self, address, size, value): for i in xrange(size): self.out_memory[address + i] = MemoryAccess( 1, Snapshot.get_byte(value, i), 0, # Output access never used ) if address + i not in self.in_memory: self.in_memory[address + i] = MemoryAccess( 1, Snapshot.get_byte(value, i), PAGE_READ, ) else: self.in_memory[address + i].access |= PAGE_READ def add_memory_write(self, address, size, value): for i in xrange(size): self.out_memory[address + i] = MemoryAccess( 1, Snapshot.get_byte(value, i), 0, # Output access never used ) if address + i not in self.in_memory: self.in_memory[address + i] = MemoryAccess( 1, "\x00", # The value is # not used by the # test PAGE_WRITE, ) else: self.in_memory[address + i].access |= PAGE_WRITE def add_executed_instruction(self, address): ''' Function called to signal that the address has been executed This function has to be called in the order of their executed instruction Else paths can not be updated correctly ''' self._previous_addr = self._current_addr self._current_addr = address self.paths.add_uniq_edge(self._previous_addr, self._current_addr) self._instr_count += 1 # Resolve call destination if (self._pending_call and self._previous_addr == self._pending_call[-1]["caller_addr"]): info = self._pending_call[-1] info["dest"] = address info["beg"] = self._instr_count def add_call(self, caller_addr, stack_ptr): ''' Function call, target is not determined yet called *before* instruction execution ''' info = { "stack_ptr": stack_ptr, "caller_addr": caller_addr, } self._pending_call.append(info) def add_ret(self, ret_addr, stack_ptr, value): ''' Function ret called *after* instruction execution ''' # Find corresponding call assert self._pending_call assert self._pending_call[-1]["stack_ptr"] >= stack_ptr info = self._pending_call.pop() info["end"] = self._instr_count info["ret"] = value current_interval = self.function_calls.setdefault( info["dest"], list()).append(info) def clean(self): """Clean the snapshot for further uses""" self.agglomerate_memory(self.in_memory) self.agglomerate_memory(self.out_memory) def agglomerate_memory(self, mem): ''' Assuming @mem is only composed of non-overlapping block this function agglomerate contiguous blocks having the same access right ''' for addr in sorted(mem.keys()): # if the addr is not already deleted if addr in mem: end_addr = addr + mem[addr].size while end_addr in mem: cur_mem = mem[addr] next_mem = mem[end_addr] # If access change, do not agglomerate if cur_mem.access != next_mem.access: break cur_mem.size += next_mem.size cur_mem.data += next_mem.data del mem[end_addr] end_addr += next_mem.size
def _build_depGraph(self, depnode): """Recursively build the final list of DiGraph, and clean up unmodifier nodes @depnode: starting node """ if depnode not in self._cache or \ not self._cache[depnode]: ## There is no dependency graph = DiGraph() graph.add_node(depnode) return graph # Recursion dependencies = list(self._cache[depnode]) graphs = [] for sub_depnode in dependencies: graphs.append(self._build_depGraph(sub_depnode)) # head(graphs[i]) == dependencies[i] graph = DiGraph() graph.add_node(depnode) for head in dependencies: graph.add_uniq_edge(head, depnode) for subgraphs in itertools.product(graphs): for sourcegraph in subgraphs: for node in sourcegraph.nodes(): graph.add_node(node) for edge in sourcegraph.edges(): graph.add_uniq_edge(*edge) # Update the running queue return graph
class ira: def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) out = set() done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if not o in found: follow.add(o) todo = follow out = self.sort_dst(todo, done) return done def gen_graph(self, link_all=False): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int( d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if not lbl in self.blocs: continue b = self.blocs[lbl] ir_txt = [str(lbl)] for irs in b.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[id(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): out += '%s -> %s;\n' % (id(a), id(b)) out += '}' return out def remove_dead(self, b): for ir, _, c_out in zip(b.irs, b.c_in, b.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif not i_cur.dst in c_out: del (ir[j]) continue j += 1 def remove_blocs_dead(self): for b in self.blocs.values(): self.remove_dead(b) # for test XXX TODO def set_dead_regs(self, b): pass def add_unused_regs(self): pass def compute_in_out(self, b): # get out/in from bloc sons modified = False # set b in if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))): modified = True b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))) # set b out c_out = set() has_son = False for n_son in self.g.successors(b.label): # print n_me, n_son has_son = True if not n_son in self.blocs: print "leaf has lost her sons!" continue b_son = self.blocs[n_son] c_out.update(b_son.c_in[0]) if not has_son: # special case: leaf nodes architecture dependant c_out = self.get_out_regs(b) if b.c_out[-1] != set(c_out): modified = True b.c_out[-1] = set(c_out) # get out/in for bloc for i in reversed(xrange(len(b.irs))): if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))): modified = True b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i]))) if b.c_out[i] != set(b.c_in[i + 1]): modified = True b.c_out[i] = set(b.c_in[i + 1]) return modified def test_in_out_fix(self): fixed = True for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue b = self.blocs[n] if b.c_in != b.l_in or b.c_out != b.l_out: fixed = False b.l_in = [set(x) for x in b.c_in] b.l_out = [set(x) for x in b.c_out] return fixed def compute_dead(self): self.get_rw() it = 0 fixed_point = False print 'iteration...', while not fixed_point: print it, it += 1 for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue b = self.blocs[n] self.compute_in_out(b) fixed_point = self.test_in_out_fix() print def dead_simp(self): self.compute_dead() self.remove_blocs_dead() self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb for eq in eqs: eq irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
def __init__(self, ab=[]): self.blocs = {} self.g = DiGraph() self.add_blocs(ab)
class ira: def ira_regs_ids(self): """Returns ids of all registers used in the IR""" return self.arch.regs.all_regs_ids + [self.IRDst] def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if o not in found: follow.add(o) todo = follow return done def gen_graph(self, link_all=True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int(d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if lbl not in self.blocs: continue irb = self.blocs[lbl] ir_txt = [str(lbl)] for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): # print 'edge', a, b, hash(a), hash(b) out += "%s -> %s;\n" % (hash(a), hash(b)) out += "}" return out def remove_dead_instr(self, irb, useful): """Remove dead affectations using previous reaches analysis @irb: irbloc instance @useful: useful statements from previous reach analysis Return True iff the block state has changed PRE: compute_reach(self) """ modified = False for k, ir in enumerate(irb.irs): j = 0 while j < len(ir): cur_instr = ir[j] if isinstance(cur_instr.dst, ExprId) and (irb.label, k, cur_instr) not in useful: del ir[j] modified = True else: j += 1 return modified def init_useful_instr(self): """Computes a set of triples (block, instruction number, instruction) containing initially useful instructions : - Instructions affecting final value of return registers - Instructions affecting IRDst register - Instructions writing in memory - Function call instructions Return set of intial useful instructions """ useful = set() for node in self.g.nodes(): if node not in self.blocs: continue block = self.blocs[node] successors = self.g.successors(node) has_son = bool(successors) for p_son in successors: if p_son not in self.blocs: # Leaf has lost its son: don't remove anything # reaching this block for r in self.ira_regs_ids(): useful.update(block.cur_reach[-1][r].union(block.defout[-1][r])) # Function call, memory write or IRDst affectation for k, ir in enumerate(block.irs): for i_cur in ir: if i_cur.src.is_function_call(): # /!\ never remove ir calls useful.add((block.label, k, i_cur)) if isinstance(i_cur.dst, ExprMem): useful.add((block.label, k, i_cur)) useful.update(block.defout[k][self.IRDst]) # Affecting return registers if not has_son: for r in self.get_out_regs(block): useful.update(block.defout[-1][r] if block.defout[-1][r] else block.cur_reach[-1][r]) return useful def _mark_useful_code(self): """Mark useful statements using previous reach analysis Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, Algorithm MK Return a set of triplets (block, instruction number, instruction) of useful instructions PRE: compute_reach(self) """ useful = self.init_useful_instr() worklist = useful.copy() while worklist: elem = worklist.pop() useful.add(elem) irb, irs_ind, ins = elem block = self.blocs[irb] instr_defout = block.defout[irs_ind] cur_kill = block.cur_kill[irs_ind] cur_reach = block.cur_reach[irs_ind] # Handle dependencies of used variables in ins for reg in ins.get_r(True).intersection(self.ira_regs_ids()): worklist.update( cur_reach[reg].difference(useful).difference(cur_kill[reg] if not instr_defout[reg] else set()) ) for _, _, i in instr_defout[reg]: # Loop case (i in defout of current block) if i == ins: worklist.update(cur_reach[reg].difference(useful)) return useful def remove_dead_code(self): """Remove dead instructions in each block of the graph using the reach analysis . Returns True if a block has been modified PRE : compute_reach(self) """ useful = self._mark_useful_code() modified = False for block in self.blocs.values(): modified |= self.remove_dead_instr(block, useful) return modified def set_dead_regs(self, b): pass def add_unused_regs(self): pass @staticmethod def print_set(v_set): """Print each triplet contained in a set @v_set: set containing triplets elements """ for p in v_set: print " (%s, %s, %s)" % p def dump_bloc_state(self, irb): print "*" * 80 for k, irs in enumerate(irb.irs): for i in xrange(len(irs)): print 5 * "-" print "instr", k, irs[i] print 5 * "-" for v in self.ira_regs_ids(): if irb.cur_reach[k][v]: print "REACH[%d][%s]" % (k, v) self.print_set(irb.cur_reach[k][v]) if irb.cur_kill[k][v]: print "KILL[%d][%s]" % (k, v) self.print_set(irb.cur_kill[k][v]) if irb.defout[k][v]: print "DEFOUT[%d][%s]" % (k, v) self.print_set(irb.defout[k][v]) def compute_reach_block(self, irb): """Variable influence computation for a single block @irb: irbloc instance PRE: init_reach() """ reach_block = {key: value.copy() for key, value in irb.cur_reach[0].iteritems()} # Compute reach from predecessors for n_pred in self.g.predecessors(irb.label): p_block = self.blocs[n_pred] # Handle each register definition for c_reg in self.ira_regs_ids(): # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) pred_through = p_block.defout[-1][c_reg].union( p_block.cur_reach[-1][c_reg].difference(p_block.cur_kill[-1][c_reg]) ) reach_block[c_reg].update(pred_through) # If a predecessor has changed if reach_block != irb.cur_reach[0]: irb.cur_reach[0] = reach_block for c_reg in self.ira_regs_ids(): if irb.defout[0][c_reg]: # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY irb.cur_kill[0][c_reg].update(reach_block[c_reg].difference(irb.defout[0][c_reg])) # Compute reach and kill for block's instructions for i in xrange(1, len(irb.irs)): for c_reg in self.ira_regs_ids(): # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) pred_through = irb.defout[i - 1][c_reg].union( irb.cur_reach[i - 1][c_reg].difference(irb.cur_kill[i - 1][c_reg]) ) irb.cur_reach[i][c_reg].update(pred_through) if irb.defout[i][c_reg]: # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY irb.cur_kill[i][c_reg].update(irb.cur_reach[i][c_reg].difference(irb.defout[i][c_reg])) def _test_kill_reach_fix(self): """Return True iff a fixed point has been reached during reach analysis""" fixed = True for node in self.g.nodes(): if node in self.blocs: irb = self.blocs[node] if irb.cur_reach != irb.prev_reach or irb.cur_kill != irb.prev_kill: fixed = False irb.prev_reach = irb.cur_reach[:] irb.prev_kill = irb.cur_kill[:] return fixed def compute_reach(self): """ Compute reach, defout and kill sets until a fixed point is reached. Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 PRE: gen_graph() """ fixed_point = False log.debug("iteration...") while not fixed_point: for node in self.g.nodes(): if node in self.blocs: self.compute_reach_block(self.blocs[node]) fixed_point = self._test_kill_reach_fix() def dead_simp(self): """ This function is used to analyse relation of a * complete function * This means the blocks under study represent a solid full function graph. Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 PRE: gen_graph() """ # Update r/w variables for all irblocs self.get_rw(self.ira_regs_ids()) # Liveness step self.compute_reach() self.remove_dead_code() # Simplify expressions self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print "*" * 40 print irb irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
def as_graph(self): """Generates a Digraph of dependencies""" graph = DiGraph() for node_a, node_b in self.links: graph.add_edge(node_b, node_a) return graph
class Snapshot(object): @classmethod def get_byte(cls, value, byte): '''Return the byte @byte of the value''' return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte)) @classmethod def unpack_ptr(cls, value): return struct.unpack('@P', value)[0] def __init__(self, abicls, machine): self.abicls = abicls self.input_reg = {} self.output_reg = {} self._previous_addr = 0 self._current_addr = 0 self._instr_count = 0 self._pending_call = [] # Function addr -> list of information on calls self.function_calls = {} self.paths = DiGraph() self.in_memory = {} self.out_memory = {} self._ira = Machine(machine).ira() self._ptr_size = self._ira.sizeof_pointer()/8 self.sp = self._ira.sp.name def add_input_register(self, reg_name, reg_value): self.input_reg[reg_name] = reg_value def add_output_register(self, reg_name, reg_value): self.output_reg[reg_name] = reg_value def add_memory_read(self, address, size, value): for i in xrange(size): self.out_memory[address + i] = MemoryAccess(1, Snapshot.get_byte(value, i), 0, # Output access never used ) if address + i not in self.in_memory: self.in_memory[address + i] = MemoryAccess(1, Snapshot.get_byte(value, i), PAGE_READ, ) else: self.in_memory[address + i].access |= PAGE_READ def add_memory_write(self, address, size, value): for i in xrange(size): self.out_memory[address + i] = MemoryAccess(1, Snapshot.get_byte(value, i), 0, # Output access never used ) if address + i not in self.in_memory: self.in_memory[address + i] = MemoryAccess(1, "\x00", # The value is # not used by the # test PAGE_WRITE, ) else: self.in_memory[address + i].access |= PAGE_WRITE def add_executed_instruction(self, address): ''' Function called to signal that the address has been executed This function has to be called in the order of their executed instruction Else paths can not be updated correctly ''' self._previous_addr = self._current_addr self._current_addr = address self.paths.add_uniq_edge(self._previous_addr, self._current_addr) self._instr_count += 1 # Resolve call destination if (self._pending_call and self._previous_addr == self._pending_call[-1]["caller_addr"]): info = self._pending_call[-1] info["dest"] = address info["beg"] = self._instr_count def add_call(self, caller_addr, stack_ptr): ''' Function call, target is not determined yet called *before* instruction execution ''' info = {"stack_ptr": stack_ptr, "caller_addr": caller_addr, } self._pending_call.append(info) def add_ret(self, ret_addr, stack_ptr, value): ''' Function ret called *after* instruction execution ''' # Find corresponding call assert self._pending_call assert self._pending_call[-1]["stack_ptr"] >= stack_ptr info = self._pending_call.pop() info["end"] = self._instr_count info["ret"] = value current_interval = self.function_calls.setdefault(info["dest"], list()).append(info) def clean(self): """Clean the snapshot for further uses""" self.agglomerate_memory(self.in_memory) self.agglomerate_memory(self.out_memory) def agglomerate_memory(self, mem): ''' Assuming @mem is only composed of non-overlapping block this function agglomerate contiguous blocks having the same access right ''' for addr in sorted(mem.keys()): # if the addr is not already deleted if addr in mem: end_addr = addr + mem[addr].size while end_addr in mem: cur_mem = mem[addr] next_mem = mem[end_addr] # If access change, do not agglomerate if cur_mem.access != next_mem.access: break cur_mem.size += next_mem.size cur_mem.data += next_mem.data del mem[end_addr] end_addr += next_mem.size
class ira: def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) out = set() done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if o not in found: follow.add(o) todo = follow out = self.sort_dst(todo, done) return done def gen_graph(self, link_all = True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId( self.symbol_pool.getby_offset_create(int(d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if lbl not in self.blocs: continue irb = self.blocs[lbl] ir_txt = [str(lbl)] for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): # print 'edge', a, b, hash(a), hash(b) out += '%s -> %s;\n' % (hash(a), hash(b)) out += '}' return out def remove_dead(self, irb): """Remove dead affectations using previous liveness analysis @irb: irbloc instance Return True iff the bloc state has changed PRE: compute_in_out(@irb) """ # print 'state1' # self.dump_bloc_state(irb) modified = False for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass elif i_cur.dst == self.IRDst: # never delete irdst pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif i_cur.dst not in c_out: del(ir[j]) modified = True continue j += 1 # print 'state2' # self.dump_bloc_state(irb) return modified def remove_blocs_dead(self): """Call remove_dead on each irbloc Return True iff one of the bloc state has changed """ modified = False for b in self.blocs.values(): modified |= self.remove_dead(b) return modified # for test XXX TODO def set_dead_regs(self, b): pass def add_unused_regs(self): pass def dump_bloc_state(self, irb): print '*'*80 for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)): print 'ir' for x in ir: print '\t', x print 'R', [str(x) for x in irb.r[i]]#c_in] print 'W', [str(x) for x in irb.w[i]]#c_out] print 'IN', [str(x) for x in c_in] print 'OUT', [str(x) for x in c_out] def compute_in_out(self, irb): """Liveness computation for a single bloc @irb: irbloc instance Return True iff bloc state has changed """ modified = False # Compute OUT for last irb entry c_out = set() has_son = False for n_son in self.g.successors(irb.label): has_son = True if n_son not in self.blocs: # If the son is not defined, we will propagate our current out # nodes to the in nodes's son son_c_in = irb.c_out_missing else: son_c_in = self.blocs[n_son].c_in[0] c_out.update(son_c_in) if not has_son: # Special case: leaf nodes architecture dependant c_out = self.get_out_regs(irb) if irb.c_out[-1] != c_out: irb.c_out[-1] = c_out modified = True # Compute out/in intra bloc for i in reversed(xrange(len(irb.irs))): new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i]))) if irb.c_in[i] != new_in: irb.c_in[i] = new_in modified = True if i >= len(irb.irs) - 1: # Last out has been previously updated continue new_out = set(irb.c_in[i + 1]) if irb.c_out[i] != new_out: irb.c_out[i] = new_out modified = True return modified def test_in_out_fix(self): """Return True iff a fixed point has been reached during liveness analysis""" fixed = True for node in self.g.nodes(): if node not in self.blocs: # leaf has lost her son continue irb = self.blocs[node] if irb.c_in != irb.l_in or irb.c_out != irb.l_out: fixed = False irb.l_in = [set(x) for x in irb.c_in] irb.l_out = [set(x) for x in irb.c_out] return fixed def fill_missing_son_c_in(self): """Find nodes with missing sons in graph, and add virtual link to all written variables of all parents. PRE: gen_graph() and get_rw()""" for node in self.g.nodes(): if node not in self.blocs: continue self.blocs[node].c_out_missing = set() has_all_son = True for node_son in self.g.successors(node): if node_son not in self.blocs: has_all_son = False break if has_all_son: continue parents = self.g.reachable_parents(node) for parent in parents: irb = self.blocs[parent] for var_w in irb.w: self.blocs[node].c_out_missing.update(var_w) def compute_dead(self): """Iterate liveness analysis until a fixed point is reached. PRE: gen_graph() """ it = 0 fixed_point = False log.debug('iteration...') while not fixed_point: log.debug(it) it += 1 for n in self.g.nodes(): if n not in self.blocs: # leaf has lost her son continue irb = self.blocs[n] self.compute_in_out(irb) fixed_point = self.test_in_out_fix() def dead_simp(self): """This function is used to analyse relation of a * complete function * This mean the blocs under study represent a solid full function graph. Ref: CS 5470 Compiler Techniques and Principles (Liveness analysis/Dataflow equations) PRE: call to gen_graph """ modified = True while modified: log.debug('dead_simp step') # Update r/w variables for all irblocs self.get_rw() # Fill c_in for missing sons self.fill_missing_son_c_in() # Liveness step self.compute_dead() modified = self.remove_blocs_dead() # Simplify expressions self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
class Snapshot(object): clobbered_regs = [ "RCX", "RDX", "RSI", "RDI", "RBP", "R8", "R9", "R10", "R11", "RBP" ] @classmethod def get_byte(cls, value, byte): '''Return the byte @byte of the value''' return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte)) @classmethod def unpack_ptr(cls, value): return struct.unpack('@P', value)[0] def __init__(self, segments, abicls, machine): self.segments = segments self.abicls = abicls self.input_reg = {} self.output_reg = {} self._previous_addr = 0 self._current_addr = 0 self.paths = DiGraph() self.in_memory = {} self.out_memory = {} self.refs = {} self._ira = Machine(machine).ira() self._ptr_size = self._ira.sizeof_pointer() / 8 self.sp = self._ira.sp.name def _get_segment_index_by_addr(self, addr): for i, seg in enumerate(self.segments): if seg[0] <= addr < seg[1]: return i raise ValueError("Segment not found for addr %x" % addr) def add_input_register(self, reg_name, reg_value): self.input_reg[reg_name] = reg_value def add_output_register(self, reg_name, reg_value): self.output_reg[reg_name] = reg_value def add_memory_read(self, address, size, value): for i in xrange(size): self.out_memory[address + i] = MemoryAccess( 1, Snapshot.get_byte(value, i), 0, # Output access never used self._get_segment_index_by_addr(address + i)) if address + i not in self.in_memory: self.in_memory[address + i] = MemoryAccess( 1, Snapshot.get_byte(value, i), PAGE_READ, self._get_segment_index_by_addr(address + i)) else: self.in_memory[address + i].access |= PAGE_READ def add_memory_write(self, address, size, value): for i in xrange(size): self.out_memory[address + i] = MemoryAccess( 1, Snapshot.get_byte(value, i), 0, # Output access never used self._get_segment_index_by_addr(address + i)) if address + i not in self.in_memory: self.in_memory[address + i] = MemoryAccess( 1, "\x00", # The value is # not used by the # test PAGE_WRITE, self._get_segment_index_by_addr(address + i)) else: self.in_memory[address + i].access |= PAGE_WRITE def add_executed_instruction(self, address): ''' Function called to signal that the address has been executed This function has to be called in the order of their executed instruction Else paths can not be updated correctly ''' self._previous_addr = self._current_addr self._current_addr = address self.paths.add_uniq_edge(self._previous_addr, self._current_addr) def clean(self): '''Try to remove all implementation dependant elements from the trace''' # do not record stack frame on output because the use of this # memory aera is implementation dependant self.remove_stack_frame() self.agglomerate_memory(self.in_memory) self.agglomerate_memory(self.out_memory) self.remove_clobbered_registers() self.find_references_to_input_memory() self.addresses_to_segment_offset() self.remap_segment() def remove_stack_frame(self): ''' Remove stack frame from the memory. Memory is considered fragmented ie. composed of one bit sized blocks ''' SP = self.input_reg[self.sp] stack_seg_idx = self._get_segment_index_by_addr(SP) top_stack = self.segments[stack_seg_idx][0] for mem in (self.out_memory, self.in_memory): for addr in mem.keys(): if top_stack < addr < SP + self._ptr_size: # addr in stack frame del mem[addr] def agglomerate_memory(self, mem): ''' Assuming @mem is only composed of 1 byte sized bloc, this function agglomerate contiguous blocs that are in the same segment and have the same access right ''' for addr in sorted(mem.keys()): # if the addr is not already deleted if addr in mem: end_addr = addr + mem[addr].size while end_addr in mem: cur_mem = mem[addr] next_mem = mem[end_addr] # If access change, do not agglomerate if cur_mem.access != next_mem.access: break # If segment change, do not agglomerate if cur_mem.segment != next_mem.segment: break cur_mem.size += next_mem.size cur_mem.data += next_mem.data del mem[end_addr] end_addr += next_mem.size def remove_clobbered_registers(self): '''Remove clobbered registers from the output memory''' self.output_reg = { reg: v for reg, v in self.output_reg.iteritems() if reg not in self.clobbered_regs } def find_references_to_input_memory(self): ''' Populate the refs attribut This attribut should contain all the references (pointers) present in memory and registers ''' # The heuristic used to detect is: if a memory bloc or a # register hold a value that is an address used in the input # memory, then this bloc or register is a reference # To determine the size of the referenced memory, the strategy # is a greedy one. All the memory that follow the referenced # address is considered as part of the reference. for reg, value in self.input_reg.iteritems(): if reg != self.sp: self.update_references(reg, value, "in_reg") for reg, value in self.output_reg.iteritems(): if reg != self.sp: self.update_references(reg, value, "out_reg") ptr_size = self._ptr_size for addr, mem in self.in_memory.iteritems(): data = mem.data for i in xrange(len(data) - ptr_size + 1): self.update_references(addr + i, self.unpack_ptr(data[i:ptr_size + i]), "in_mem") for addr, mem in self.out_memory.iteritems(): data = mem.data for i in xrange(len(data) - ptr_size + 1): self.update_references(addr + i, self.unpack_ptr(data[i:ptr_size + i]), "out_mem") self.add_rsp_ref_to_stack() # If two references are contiguous, then the first reference # size will cover the second one. The following code remove # these kind of overlaps for ref in sorted(self.refs): for ref2 in self.refs: if ref < ref2 < ref + self.refs[ref].size: self.refs[ref].size -= self.refs[ref2].size def add_rsp_ref_to_stack(self): in_RSP = self.input_reg[self.sp] out_RSP = self.output_reg[self.sp] stack_seg_idx = self._get_segment_index_by_addr(in_RSP) if in_RSP not in self.refs: self.refs[in_RSP] = Reference(stack_seg_idx, self._ptr_size) self.refs[in_RSP].add_ref("RSP", "in_reg") if out_RSP not in self.refs: self.refs[out_RSP] = Reference(stack_seg_idx, self._ptr_size) self.refs[out_RSP].add_ref("RSP", "out_reg") def update_references(self, name, value, value_type): in_mem = self.in_memory for addr, mem in in_mem.iteritems(): if addr <= value < addr + mem.size: if value not in self.refs: size = mem.size - (value - addr) while addr + size in in_mem: size += in_mem[addr + size].size self.refs[value] = Reference(mem.segment, size) self.refs[value].add_ref(name, value_type) def addresses_to_segment_offset(self): '''Convert the absolute addresses to segment base/offset addresses''' self._addresses_to_segment_offset(self.in_memory) self._addresses_to_segment_offset(self.out_memory) self._addresses_to_segment_offset(self.refs) for ref in self.refs.itervalues(): ref.addresses_to_segment_offset(self.segments) def _addresses_to_segment_offset(self, mem): for addr in mem.keys(): seg_idx = mem[addr].segment segment_base = self.segments[seg_idx][0] mem[(addr - segment_base, seg_idx)] = mem.pop(addr) def isRegInInputRef(self, reg): for addr, ref in self.refs.iteritems(): if reg in ref.in_reg: return addr return None def isRegInOutputRef(self, reg): for addr, ref in self.refs.iteritems(): if reg in ref.out_reg: return addr return None def isMemInRef(self, mem): for addr, ref in self.refs.iteritems(): if mem in ref.in_mem: return addr return None def _updateSegmentInDict(self, dic, seg_mapping): ''' Change the segment indexes used by dic according to the new segment mapping seg_mapping ''' for (offset, seg) in dic.keys(): new_seg_nb = seg_mapping[seg] dic[(offset, seg)].segment = new_seg_nb dic[(offset, new_seg_nb)] = dic.pop((offset, seg)) def __update_mapping_struct(self, memory, seg_mapping, seg_borne): ''' Add the segments used in mem to the segment mapping (seg_mapping) and update the bornes (seg_borne) Instance variable "__nb_seg" should be initialized to 0 before the fisrt call to this function ''' for (offset, seg), mem in memory.iteritems(): if seg in seg_mapping: (minAddr, maxAddr) = seg_borne[seg] seg_borne[seg] = (min(minAddr, offset), max(maxAddr, offset + mem.size)) else: seg_borne[seg] = (offset, offset + mem.size) seg_mapping[seg] = self.__nb_seg self.__nb_seg += 1 return self.__nb_seg def remap_segment(self): ''' Reduce the self.segment structure to be minimalist (only segments used by in and out memory) ''' seg_mapping = {} seg_borne = {} # Get the new segment mapping and corresponding sizes self.__nb_seg = 0 self.__update_mapping_struct(self.in_memory, seg_mapping, seg_borne) self.__update_mapping_struct(self.out_memory, seg_mapping, seg_borne) self.__update_mapping_struct(self.refs, seg_mapping, seg_borne) seg_size = [0] * len(seg_mapping) for no_seg, borne in seg_borne.iteritems(): seg_size[seg_mapping[no_seg]] = borne[1] - borne[0] # Use the new mapping in snapshot's dictionaries self._updateSegmentInDict(self.in_memory, seg_mapping) self._updateSegmentInDict(self.out_memory, seg_mapping) self._updateSegmentInDict(self.refs, seg_mapping) # Use the new mapping inside reference structure for ref in self.refs.itervalues(): ref.in_mem = [(offset, seg_mapping[seg]) for (offset, seg) in ref.in_mem] ref.out_mem = [(offset, seg_mapping[seg]) for (offset, seg) in ref.out_mem] # Update self.segment according to the new mapping new_segments = [None] * len(seg_mapping) for seg, mapping in seg_mapping.iteritems(): new_segments[mapping] = self.segments[seg] self.segments = new_segments def removeRegFromRef(self, reg): for ref in self.refs.itervalues(): if reg in ref.in_reg: ref.in_reg.remove(reg) def removeMemFromRef(self, mem): for ref in self.refs.itervalues(): if mem in ref.in_mem: ref.in_mem.remove(mem) def getStackSegment(self): for addr, ref in self.refs.iteritems(): if self.sp in ref.in_reg: return addr # True: arg changed # False: arg not present def changeArg(self, number, newValue): # If argument is pass in a register if number < 7: reg_list = self.abicls.regs_mapping self.removeRegFromRef(reg_list[number - 1]) try: self.input_reg[reg_list[number - 1]] = newValue del self.output_reg[reg_list[number - 1]] except KeyError: pass return True # If argument is pass on the stack else: (stackOff, stackSeg) = self.getStackSegment() argAddr = stackOff + 8 * (number - 7) + 8 self.removeMemFromRef((argAddr, stackSeg)) argFoundInSnapshot = False memO = self.out_memory memI = self.in_memory # Search for the arg adresse in the input memory$ # If it is found, change its value for (offset, seg), mem in memI.iteritems(): if seg == stackSeg: if offset <= argAddr < offset + mem.size: argFoundInSnapshot = True data = mem.data mem.data = data[0:argAddr - offset] + struct.pack( '@P', newValue) + data[self._ptr_size + argAddr - offset:] mem.size = len(mem.data) # If the argument is found, we remove it from the input memory # Because it will be allocated as an argument by the test itself if argFoundInSnapshot: for (offset, seg) in memO.keys(): addr = (offset, seg) if seg == stackSeg: if offset <= argAddr < offset + memO[addr].size: if argAddr + self._ptr_size < offset + memO[ addr].size: memaccess = MemoryAccess( memO[addr].size - (argAddr + self._ptr_size - offset), memO[addr].data[argAddr - offset:argAddr + self._ptr_size - offset], memO[addr].access, stackSeg) memO[(argAddr + self._ptr_size, stackSeg)] = memaccess if argAddr > offset: memO[addr].size = argAddr - offset else: del memO[addr] return argFoundInSnapshot
def _build_depgraph(self, depnode): """Recursively build the final list of DiGraph, and clean up unmodifier nodes @depnode: starting node """ if depnode not in self._cache or \ not self._cache[depnode]: # There is no dependency graph = DiGraph() graph.add_node(depnode) return graph # Recursion dependencies = list(self._cache[depnode]) graphs = [] for sub_depnode in dependencies: graphs.append(self._build_depgraph(sub_depnode)) # head(graphs[i]) == dependencies[i] graph = DiGraph() graph.add_node(depnode) for head in dependencies: graph.add_uniq_edge(head, depnode) for subgraphs in itertools.product(graphs): for sourcegraph in subgraphs: for node in sourcegraph.nodes(): graph.add_node(node) for edge in sourcegraph.edges(): graph.add_uniq_edge(*edge) # Update the running queue return graph
class ira: def ira_regs_ids(self): """Returns ids of all registers used in the IR""" return self.arch.regs.all_regs_ids + [self.IRDst] def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if o not in found: follow.add(o) todo = follow return done def gen_graph(self, link_all=True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int( d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if lbl not in self.blocs: continue irb = self.blocs[lbl] ir_txt = [str(lbl)] for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): # print 'edge', a, b, hash(a), hash(b) out += '%s -> %s;\n' % (hash(a), hash(b)) out += '}' return out def remove_dead_instr(self, irb, useful): """Remove dead affectations using previous reaches analysis @irb: irbloc instance @useful: useful statements from previous reach analysis Return True iff the block state has changed PRE: compute_reach(self) """ modified = False for k, ir in enumerate(irb.irs): j = 0 while j < len(ir): cur_instr = ir[j] if (isinstance(cur_instr.dst, ExprId) and (irb.label, k, cur_instr) not in useful): del ir[j] modified = True else: j += 1 return modified def init_useful_instr(self): """Computes a set of triples (block, instruction number, instruction) containing initially useful instructions : - Instructions affecting final value of return registers - Instructions affecting IRDst register - Instructions writing in memory - Function call instructions Return set of intial useful instructions """ useful = set() for node in self.g.nodes(): if node not in self.blocs: continue block = self.blocs[node] successors = self.g.successors(node) has_son = bool(successors) for p_son in successors: if p_son not in self.blocs: # Leaf has lost its son: don't remove anything # reaching this block for r in self.ira_regs_ids(): useful.update(block.cur_reach[-1][r].union( block.defout[-1][r])) # Function call, memory write or IRDst affectation for k, ir in enumerate(block.irs): for i_cur in ir: if i_cur.is_function_call(): # /!\ never remove ir calls useful.add((block.label, k, i_cur)) if isinstance(i_cur.dst, ExprMem): useful.add((block.label, k, i_cur)) useful.update(block.defout[k][self.IRDst]) # Affecting return registers if not has_son: for r in self.get_out_regs(block): useful.update(block.defout[-1][r] if block. defout[-1][r] else block.cur_reach[-1][r]) return useful def _mark_useful_code(self): """Mark useful statements using previous reach analysis Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, Algorithm MK Return a set of triplets (block, instruction number, instruction) of useful instructions PRE: compute_reach(self) """ useful = self.init_useful_instr() worklist = useful.copy() while worklist: elem = worklist.pop() useful.add(elem) irb, irs_ind, ins = elem block = self.blocs[irb] instr_defout = block.defout[irs_ind] cur_kill = block.cur_kill[irs_ind] cur_reach = block.cur_reach[irs_ind] # Handle dependencies of used variables in ins for reg in ins.get_r(True).intersection(self.ira_regs_ids()): worklist.update(cur_reach[reg].difference(useful).difference( cur_kill[reg] if not instr_defout[reg] else set())) for _, _, i in instr_defout[reg]: # Loop case (i in defout of current block) if i == ins: worklist.update(cur_reach[reg].difference(useful)) return useful def remove_dead_code(self): """Remove dead instructions in each block of the graph using the reach analysis . Returns True if a block has been modified PRE : compute_reach(self) """ useful = self._mark_useful_code() modified = False for block in self.blocs.values(): modified |= self.remove_dead_instr(block, useful) return modified def set_dead_regs(self, b): pass def add_unused_regs(self): pass @staticmethod def print_set(v_set): """Print each triplet contained in a set @v_set: set containing triplets elements """ for p in v_set: print ' (%s, %s, %s)' % p def dump_bloc_state(self, irb): print '*' * 80 for k, irs in enumerate(irb.irs): for i in xrange(len(irs)): print 5 * "-" print 'instr', k, irs[i] print 5 * "-" for v in self.ira_regs_ids(): if irb.cur_reach[k][v]: print 'REACH[%d][%s]' % (k, v) self.print_set(irb.cur_reach[k][v]) if irb.cur_kill[k][v]: print 'KILL[%d][%s]' % (k, v) self.print_set(irb.cur_kill[k][v]) if irb.defout[k][v]: print 'DEFOUT[%d][%s]' % (k, v) self.print_set(irb.defout[k][v]) def compute_reach_block(self, irb): """Variable influence computation for a single block @irb: irbloc instance PRE: init_reach() """ reach_block = { key: value.copy() for key, value in irb.cur_reach[0].iteritems() } # Compute reach from predecessors for n_pred in self.g.predecessors(irb.label): p_block = self.blocs[n_pred] # Handle each register definition for c_reg in self.ira_regs_ids(): # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) pred_through = p_block.defout[-1][c_reg].union( p_block.cur_reach[-1][c_reg].difference( p_block.cur_kill[-1][c_reg])) reach_block[c_reg].update(pred_through) # If a predecessor has changed if reach_block != irb.cur_reach[0]: irb.cur_reach[0] = reach_block for c_reg in self.ira_regs_ids(): if irb.defout[0][c_reg]: # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY irb.cur_kill[0][c_reg].update( reach_block[c_reg].difference(irb.defout[0][c_reg])) # Compute reach and kill for block's instructions for i in xrange(1, len(irb.irs)): for c_reg in self.ira_regs_ids(): # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) pred_through = irb.defout[i - 1][c_reg].union( irb.cur_reach[i - 1][c_reg].difference( irb.cur_kill[i - 1][c_reg])) irb.cur_reach[i][c_reg].update(pred_through) if irb.defout[i][c_reg]: # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY irb.cur_kill[i][c_reg].update( irb.cur_reach[i][c_reg].difference( irb.defout[i][c_reg])) def _test_kill_reach_fix(self): """Return True iff a fixed point has been reached during reach analysis""" fixed = True for node in self.g.nodes(): if node in self.blocs: irb = self.blocs[node] if (irb.cur_reach != irb.prev_reach or irb.cur_kill != irb.prev_kill): fixed = False irb.prev_reach = irb.cur_reach[:] irb.prev_kill = irb.cur_kill[:] return fixed def compute_reach(self): """ Compute reach, defout and kill sets until a fixed point is reached. Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 PRE: gen_graph() """ fixed_point = False log.debug('iteration...') while not fixed_point: for node in self.g.nodes(): if node in self.blocs: self.compute_reach_block(self.blocs[node]) fixed_point = self._test_kill_reach_fix() def dead_simp(self): """ This function is used to analyse relation of a * complete function * This means the blocks under study represent a solid full function graph. Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 PRE: gen_graph() """ # Update r/w variables for all irblocs self.get_rw(self.ira_regs_ids()) # Liveness step self.compute_reach() self.remove_dead_code() # Simplify expressions self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")