class ira: def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) out = set() done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if o not in found: follow.add(o) todo = follow out = self.sort_dst(todo, done) return done def gen_graph(self, link_all=True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int( d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if lbl not in self.blocs: continue irb = self.blocs[lbl] ir_txt = [str(lbl)] for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): # print 'edge', a, b, hash(a), hash(b) out += '%s -> %s;\n' % (hash(a), hash(b)) out += '}' return out def remove_dead(self, irb): """Remove dead affectations using previous liveness analysis @irb: irbloc instance Return True iff the bloc state has changed PRE: compute_in_out(@irb) """ # print 'state1' # self.dump_bloc_state(irb) modified = False for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass elif i_cur.dst == self.IRDst: # never delete irdst pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif i_cur.dst not in c_out: del (ir[j]) modified = True continue j += 1 # print 'state2' # self.dump_bloc_state(irb) return modified def remove_blocs_dead(self): """Call remove_dead on each irbloc Return True iff one of the bloc state has changed """ modified = False for b in self.blocs.values(): modified |= self.remove_dead(b) return modified # for test XXX TODO def set_dead_regs(self, b): pass def add_unused_regs(self): pass def dump_bloc_state(self, irb): print '*' * 80 for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)): print 'ir' for x in ir: print '\t', x print 'R', [str(x) for x in irb.r[i]] #c_in] print 'W', [str(x) for x in irb.w[i]] #c_out] print 'IN', [str(x) for x in c_in] print 'OUT', [str(x) for x in c_out] def compute_in_out(self, irb): """Liveness computation for a single bloc @irb: irbloc instance Return True iff bloc state has changed """ modified = False # Compute OUT for last irb entry c_out = set() has_son = False for n_son in self.g.successors(irb.label): has_son = True if n_son not in self.blocs: # If the son is not defined, we will propagate our current out # nodes to the in nodes's son son_c_in = irb.c_out_missing else: son_c_in = self.blocs[n_son].c_in[0] c_out.update(son_c_in) if not has_son: # Special case: leaf nodes architecture dependant c_out = self.get_out_regs(irb) if irb.c_out[-1] != c_out: irb.c_out[-1] = c_out modified = True # Compute out/in intra bloc for i in reversed(xrange(len(irb.irs))): new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i]))) if irb.c_in[i] != new_in: irb.c_in[i] = new_in modified = True if i >= len(irb.irs) - 1: # Last out has been previously updated continue new_out = set(irb.c_in[i + 1]) if irb.c_out[i] != new_out: irb.c_out[i] = new_out modified = True return modified def test_in_out_fix(self): """Return True iff a fixed point has been reached during liveness analysis""" fixed = True for node in self.g.nodes(): if node not in self.blocs: # leaf has lost her son continue irb = self.blocs[node] if irb.c_in != irb.l_in or irb.c_out != irb.l_out: fixed = False irb.l_in = [set(x) for x in irb.c_in] irb.l_out = [set(x) for x in irb.c_out] return fixed def fill_missing_son_c_in(self): """Find nodes with missing sons in graph, and add virtual link to all written variables of all parents. PRE: gen_graph() and get_rw()""" for node in self.g.nodes(): if node not in self.blocs: continue self.blocs[node].c_out_missing = set() has_all_son = True for node_son in self.g.successors(node): if node_son not in self.blocs: has_all_son = False break if has_all_son: continue parents = self.g.reachable_parents(node) for parent in parents: irb = self.blocs[parent] for var_w in irb.w: self.blocs[node].c_out_missing.update(var_w) def compute_dead(self): """Iterate liveness analysis until a fixed point is reached. PRE: gen_graph() """ it = 0 fixed_point = False log.debug('iteration...') while not fixed_point: log.debug(it) it += 1 for n in self.g.nodes(): if n not in self.blocs: # leaf has lost her son continue irb = self.blocs[n] self.compute_in_out(irb) fixed_point = self.test_in_out_fix() def dead_simp(self): """This function is used to analyse relation of a * complete function * This mean the blocs under study represent a solid full function graph. Ref: CS 5470 Compiler Techniques and Principles (Liveness analysis/Dataflow equations) PRE: call to gen_graph """ modified = True while modified: log.debug('dead_simp step') # Update r/w variables for all irblocs self.get_rw() # Fill c_in for missing sons self.fill_missing_son_c_in() # Liveness step self.compute_dead() modified = self.remove_blocs_dead() # Simplify expressions self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
class ira: def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) out = set() done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if not o in found: follow.add(o) todo = follow out = self.sort_dst(todo, done) return done def gen_graph(self, link_all = False): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId( self.symbol_pool.getby_offset_create(int(d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if not lbl in self.blocs: continue b = self.blocs[lbl] ir_txt = [str(lbl)] for irs in b.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[id(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): out += '%s -> %s;\n' % (id(a), id(b)) out += '}' return out def remove_dead(self, b): for ir, _, c_out in zip(b.irs, b.c_in, b.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif not i_cur.dst in c_out: del(ir[j]) continue j += 1 def remove_blocs_dead(self): for b in self.blocs.values(): self.remove_dead(b) # for test XXX TODO def set_dead_regs(self, b): pass def add_unused_regs(self): pass def compute_in_out(self, b): # get out/in from bloc sons modified = False # set b in if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))): modified = True b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))) # set b out c_out = set() has_son = False for n_son in self.g.successors(b.label): # print n_me, n_son has_son = True if not n_son in self.blocs: print "leaf has lost her sons!" continue b_son = self.blocs[n_son] c_out.update(b_son.c_in[0]) if not has_son: # special case: leaf nodes architecture dependant c_out = self.get_out_regs(b) if b.c_out[-1] != set(c_out): modified = True b.c_out[-1] = set(c_out) # get out/in for bloc for i in reversed(xrange(len(b.irs))): if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))): modified = True b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i]))) if b.c_out[i] != set(b.c_in[i + 1]): modified = True b.c_out[i] = set(b.c_in[i + 1]) return modified def test_in_out_fix(self): fixed = True for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue b = self.blocs[n] if b.c_in != b.l_in or b.c_out != b.l_out: fixed = False b.l_in = [set(x) for x in b.c_in] b.l_out = [set(x) for x in b.c_out] return fixed def compute_dead(self): self.get_rw() it = 0 fixed_point = False print 'iteration...', while not fixed_point: print it, it += 1 for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue b = self.blocs[n] self.compute_in_out(b) fixed_point = self.test_in_out_fix() print def dead_simp(self): self.compute_dead() self.remove_blocs_dead() self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb for eq in eqs: eq irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
class ira: def ira_regs_ids(self): """Returns ids of all registers used in the IR""" return self.arch.regs.all_regs_ids + [self.IRDst] def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if o not in found: follow.add(o) todo = follow return done def gen_graph(self, link_all=True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int(d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if lbl not in self.blocs: continue irb = self.blocs[lbl] ir_txt = [str(lbl)] for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): # print 'edge', a, b, hash(a), hash(b) out += "%s -> %s;\n" % (hash(a), hash(b)) out += "}" return out def remove_dead_instr(self, irb, useful): """Remove dead affectations using previous reaches analysis @irb: irbloc instance @useful: useful statements from previous reach analysis Return True iff the block state has changed PRE: compute_reach(self) """ modified = False for k, ir in enumerate(irb.irs): j = 0 while j < len(ir): cur_instr = ir[j] if isinstance(cur_instr.dst, ExprId) and (irb.label, k, cur_instr) not in useful: del ir[j] modified = True else: j += 1 return modified def init_useful_instr(self): """Computes a set of triples (block, instruction number, instruction) containing initially useful instructions : - Instructions affecting final value of return registers - Instructions affecting IRDst register - Instructions writing in memory - Function call instructions Return set of intial useful instructions """ useful = set() for node in self.g.nodes(): if node not in self.blocs: continue block = self.blocs[node] successors = self.g.successors(node) has_son = bool(successors) for p_son in successors: if p_son not in self.blocs: # Leaf has lost its son: don't remove anything # reaching this block for r in self.ira_regs_ids(): useful.update(block.cur_reach[-1][r].union(block.defout[-1][r])) # Function call, memory write or IRDst affectation for k, ir in enumerate(block.irs): for i_cur in ir: if i_cur.src.is_function_call(): # /!\ never remove ir calls useful.add((block.label, k, i_cur)) if isinstance(i_cur.dst, ExprMem): useful.add((block.label, k, i_cur)) useful.update(block.defout[k][self.IRDst]) # Affecting return registers if not has_son: for r in self.get_out_regs(block): useful.update(block.defout[-1][r] if block.defout[-1][r] else block.cur_reach[-1][r]) return useful def _mark_useful_code(self): """Mark useful statements using previous reach analysis Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, Algorithm MK Return a set of triplets (block, instruction number, instruction) of useful instructions PRE: compute_reach(self) """ useful = self.init_useful_instr() worklist = useful.copy() while worklist: elem = worklist.pop() useful.add(elem) irb, irs_ind, ins = elem block = self.blocs[irb] instr_defout = block.defout[irs_ind] cur_kill = block.cur_kill[irs_ind] cur_reach = block.cur_reach[irs_ind] # Handle dependencies of used variables in ins for reg in ins.get_r(True).intersection(self.ira_regs_ids()): worklist.update( cur_reach[reg].difference(useful).difference(cur_kill[reg] if not instr_defout[reg] else set()) ) for _, _, i in instr_defout[reg]: # Loop case (i in defout of current block) if i == ins: worklist.update(cur_reach[reg].difference(useful)) return useful def remove_dead_code(self): """Remove dead instructions in each block of the graph using the reach analysis . Returns True if a block has been modified PRE : compute_reach(self) """ useful = self._mark_useful_code() modified = False for block in self.blocs.values(): modified |= self.remove_dead_instr(block, useful) return modified def set_dead_regs(self, b): pass def add_unused_regs(self): pass @staticmethod def print_set(v_set): """Print each triplet contained in a set @v_set: set containing triplets elements """ for p in v_set: print " (%s, %s, %s)" % p def dump_bloc_state(self, irb): print "*" * 80 for k, irs in enumerate(irb.irs): for i in xrange(len(irs)): print 5 * "-" print "instr", k, irs[i] print 5 * "-" for v in self.ira_regs_ids(): if irb.cur_reach[k][v]: print "REACH[%d][%s]" % (k, v) self.print_set(irb.cur_reach[k][v]) if irb.cur_kill[k][v]: print "KILL[%d][%s]" % (k, v) self.print_set(irb.cur_kill[k][v]) if irb.defout[k][v]: print "DEFOUT[%d][%s]" % (k, v) self.print_set(irb.defout[k][v]) def compute_reach_block(self, irb): """Variable influence computation for a single block @irb: irbloc instance PRE: init_reach() """ reach_block = {key: value.copy() for key, value in irb.cur_reach[0].iteritems()} # Compute reach from predecessors for n_pred in self.g.predecessors(irb.label): p_block = self.blocs[n_pred] # Handle each register definition for c_reg in self.ira_regs_ids(): # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) pred_through = p_block.defout[-1][c_reg].union( p_block.cur_reach[-1][c_reg].difference(p_block.cur_kill[-1][c_reg]) ) reach_block[c_reg].update(pred_through) # If a predecessor has changed if reach_block != irb.cur_reach[0]: irb.cur_reach[0] = reach_block for c_reg in self.ira_regs_ids(): if irb.defout[0][c_reg]: # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY irb.cur_kill[0][c_reg].update(reach_block[c_reg].difference(irb.defout[0][c_reg])) # Compute reach and kill for block's instructions for i in xrange(1, len(irb.irs)): for c_reg in self.ira_regs_ids(): # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) pred_through = irb.defout[i - 1][c_reg].union( irb.cur_reach[i - 1][c_reg].difference(irb.cur_kill[i - 1][c_reg]) ) irb.cur_reach[i][c_reg].update(pred_through) if irb.defout[i][c_reg]: # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY irb.cur_kill[i][c_reg].update(irb.cur_reach[i][c_reg].difference(irb.defout[i][c_reg])) def _test_kill_reach_fix(self): """Return True iff a fixed point has been reached during reach analysis""" fixed = True for node in self.g.nodes(): if node in self.blocs: irb = self.blocs[node] if irb.cur_reach != irb.prev_reach or irb.cur_kill != irb.prev_kill: fixed = False irb.prev_reach = irb.cur_reach[:] irb.prev_kill = irb.cur_kill[:] return fixed def compute_reach(self): """ Compute reach, defout and kill sets until a fixed point is reached. Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 PRE: gen_graph() """ fixed_point = False log.debug("iteration...") while not fixed_point: for node in self.g.nodes(): if node in self.blocs: self.compute_reach_block(self.blocs[node]) fixed_point = self._test_kill_reach_fix() def dead_simp(self): """ This function is used to analyse relation of a * complete function * This means the blocks under study represent a solid full function graph. Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 PRE: gen_graph() """ # Update r/w variables for all irblocs self.get_rw(self.ira_regs_ids()) # Liveness step self.compute_reach() self.remove_dead_code() # Simplify expressions self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print "*" * 40 print irb irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
class ira: def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) out = set() done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if not o in found: follow.add(o) todo = follow out = self.sort_dst(todo, done) return done def gen_graph(self, link_all=False): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int( d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if not lbl in self.blocs: continue b = self.blocs[lbl] ir_txt = [str(lbl)] for irs in b.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[id(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): out += '%s -> %s;\n' % (id(a), id(b)) out += '}' return out def remove_dead(self, b): for ir, _, c_out in zip(b.irs, b.c_in, b.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif not i_cur.dst in c_out: del (ir[j]) continue j += 1 def remove_blocs_dead(self): for b in self.blocs.values(): self.remove_dead(b) # for test XXX TODO def set_dead_regs(self, b): pass def add_unused_regs(self): pass def compute_in_out(self, b): # get out/in from bloc sons modified = False # set b in if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))): modified = True b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))) # set b out c_out = set() has_son = False for n_son in self.g.successors(b.label): # print n_me, n_son has_son = True if not n_son in self.blocs: print "leaf has lost her sons!" continue b_son = self.blocs[n_son] c_out.update(b_son.c_in[0]) if not has_son: # special case: leaf nodes architecture dependant c_out = self.get_out_regs(b) if b.c_out[-1] != set(c_out): modified = True b.c_out[-1] = set(c_out) # get out/in for bloc for i in reversed(xrange(len(b.irs))): if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))): modified = True b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i]))) if b.c_out[i] != set(b.c_in[i + 1]): modified = True b.c_out[i] = set(b.c_in[i + 1]) return modified def test_in_out_fix(self): fixed = True for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue b = self.blocs[n] if b.c_in != b.l_in or b.c_out != b.l_out: fixed = False b.l_in = [set(x) for x in b.c_in] b.l_out = [set(x) for x in b.c_out] return fixed def compute_dead(self): self.get_rw() it = 0 fixed_point = False print 'iteration...', while not fixed_point: print it, it += 1 for n in self.g.nodes(): if not n in self.blocs: # leaf has lost her son continue b = self.blocs[n] self.compute_in_out(b) fixed_point = self.test_in_out_fix() print def dead_simp(self): self.compute_dead() self.remove_blocs_dead() self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb for eq in eqs: eq irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
class ira: def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) out = set() done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if o not in found: follow.add(o) todo = follow out = self.sort_dst(todo, done) return done def gen_graph(self, link_all = True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId( self.symbol_pool.getby_offset_create(int(d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if lbl not in self.blocs: continue irb = self.blocs[lbl] ir_txt = [str(lbl)] for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): # print 'edge', a, b, hash(a), hash(b) out += '%s -> %s;\n' % (hash(a), hash(b)) out += '}' return out def remove_dead(self, irb): """Remove dead affectations using previous liveness analysis @irb: irbloc instance Return True iff the bloc state has changed PRE: compute_in_out(@irb) """ # print 'state1' # self.dump_bloc_state(irb) modified = False for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out): j = 0 while j < len(ir): i_cur = ir[j] if not isinstance(i_cur.dst, ExprId): pass elif i_cur.dst == self.IRDst: # never delete irdst pass elif (isinstance(i_cur.src, ExprOp) and i_cur.src.op.startswith('call')): # /!\ never remove ir calls pass elif i_cur.dst not in c_out: del(ir[j]) modified = True continue j += 1 # print 'state2' # self.dump_bloc_state(irb) return modified def remove_blocs_dead(self): """Call remove_dead on each irbloc Return True iff one of the bloc state has changed """ modified = False for b in self.blocs.values(): modified |= self.remove_dead(b) return modified # for test XXX TODO def set_dead_regs(self, b): pass def add_unused_regs(self): pass def dump_bloc_state(self, irb): print '*'*80 for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)): print 'ir' for x in ir: print '\t', x print 'R', [str(x) for x in irb.r[i]]#c_in] print 'W', [str(x) for x in irb.w[i]]#c_out] print 'IN', [str(x) for x in c_in] print 'OUT', [str(x) for x in c_out] def compute_in_out(self, irb): """Liveness computation for a single bloc @irb: irbloc instance Return True iff bloc state has changed """ modified = False # Compute OUT for last irb entry c_out = set() has_son = False for n_son in self.g.successors(irb.label): has_son = True if n_son not in self.blocs: # If the son is not defined, we will propagate our current out # nodes to the in nodes's son son_c_in = irb.c_out_missing else: son_c_in = self.blocs[n_son].c_in[0] c_out.update(son_c_in) if not has_son: # Special case: leaf nodes architecture dependant c_out = self.get_out_regs(irb) if irb.c_out[-1] != c_out: irb.c_out[-1] = c_out modified = True # Compute out/in intra bloc for i in reversed(xrange(len(irb.irs))): new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i]))) if irb.c_in[i] != new_in: irb.c_in[i] = new_in modified = True if i >= len(irb.irs) - 1: # Last out has been previously updated continue new_out = set(irb.c_in[i + 1]) if irb.c_out[i] != new_out: irb.c_out[i] = new_out modified = True return modified def test_in_out_fix(self): """Return True iff a fixed point has been reached during liveness analysis""" fixed = True for node in self.g.nodes(): if node not in self.blocs: # leaf has lost her son continue irb = self.blocs[node] if irb.c_in != irb.l_in or irb.c_out != irb.l_out: fixed = False irb.l_in = [set(x) for x in irb.c_in] irb.l_out = [set(x) for x in irb.c_out] return fixed def fill_missing_son_c_in(self): """Find nodes with missing sons in graph, and add virtual link to all written variables of all parents. PRE: gen_graph() and get_rw()""" for node in self.g.nodes(): if node not in self.blocs: continue self.blocs[node].c_out_missing = set() has_all_son = True for node_son in self.g.successors(node): if node_son not in self.blocs: has_all_son = False break if has_all_son: continue parents = self.g.reachable_parents(node) for parent in parents: irb = self.blocs[parent] for var_w in irb.w: self.blocs[node].c_out_missing.update(var_w) def compute_dead(self): """Iterate liveness analysis until a fixed point is reached. PRE: gen_graph() """ it = 0 fixed_point = False log.debug('iteration...') while not fixed_point: log.debug(it) it += 1 for n in self.g.nodes(): if n not in self.blocs: # leaf has lost her son continue irb = self.blocs[n] self.compute_in_out(irb) fixed_point = self.test_in_out_fix() def dead_simp(self): """This function is used to analyse relation of a * complete function * This mean the blocs under study represent a solid full function graph. Ref: CS 5470 Compiler Techniques and Principles (Liveness analysis/Dataflow equations) PRE: call to gen_graph """ modified = True while modified: log.debug('dead_simp step') # Update r/w variables for all irblocs self.get_rw() # Fill c_in for missing sons self.fill_missing_son_c_in() # Liveness step self.compute_dead() modified = self.remove_blocs_dead() # Simplify expressions self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")
class ira: def ira_regs_ids(self): """Returns ids of all registers used in the IR""" return self.arch.regs.all_regs_ids + [self.IRDst] def sort_dst(self, todo, done): out = set() while todo: dst = todo.pop() if self.ExprIsLabel(dst): done.add(dst) elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt): done.add(dst) elif isinstance(dst, ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, ExprId): out.add(dst) else: done.add(dst) return out def dst_trackback(self, b): dst = b.dst todo = set([dst]) done = set() for irs in reversed(b.irs): if len(todo) == 0: break out = self.sort_dst(todo, done) found = set() follow = set() for i in irs: if not out: break for o in out: if i.dst == o: follow.add(i.src) found.add(o) for o in found: out.remove(o) for o in out: if o not in found: follow.add(o) todo = follow return done def gen_graph(self, link_all=True): """ Gen irbloc digraph @link_all: also gen edges to non present irblocs """ self.g = DiGraph() for lbl, b in self.blocs.items(): # print 'add', lbl self.g.add_node(lbl) # dst = self.get_bloc_dst(b) dst = self.dst_trackback(b) # print "\tdst", dst for d in dst: if isinstance(d, ExprInt): d = ExprId(self.symbol_pool.getby_offset_create(int( d.arg))) if self.ExprIsLabel(d): if d.name in self.blocs or link_all is True: self.g.add_edge(lbl, d.name) def graph(self): """Output the graphviz script""" out = """ digraph asm_graph { size="80,50"; node [ fontsize = "16", shape = "box" ]; """ all_lbls = {} for lbl in self.g.nodes(): if lbl not in self.blocs: continue irb = self.blocs[lbl] ir_txt = [str(lbl)] for irs in irb.irs: for l in irs: ir_txt.append(str(l)) ir_txt.append("") ir_txt.append("") all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt) for l, v in all_lbls.items(): # print l, v out += '%s [label="%s"];\n' % (l, v) for a, b in self.g.edges(): # print 'edge', a, b, hash(a), hash(b) out += '%s -> %s;\n' % (hash(a), hash(b)) out += '}' return out def remove_dead_instr(self, irb, useful): """Remove dead affectations using previous reaches analysis @irb: irbloc instance @useful: useful statements from previous reach analysis Return True iff the block state has changed PRE: compute_reach(self) """ modified = False for k, ir in enumerate(irb.irs): j = 0 while j < len(ir): cur_instr = ir[j] if (isinstance(cur_instr.dst, ExprId) and (irb.label, k, cur_instr) not in useful): del ir[j] modified = True else: j += 1 return modified def init_useful_instr(self): """Computes a set of triples (block, instruction number, instruction) containing initially useful instructions : - Instructions affecting final value of return registers - Instructions affecting IRDst register - Instructions writing in memory - Function call instructions Return set of intial useful instructions """ useful = set() for node in self.g.nodes(): if node not in self.blocs: continue block = self.blocs[node] successors = self.g.successors(node) has_son = bool(successors) for p_son in successors: if p_son not in self.blocs: # Leaf has lost its son: don't remove anything # reaching this block for r in self.ira_regs_ids(): useful.update(block.cur_reach[-1][r].union( block.defout[-1][r])) # Function call, memory write or IRDst affectation for k, ir in enumerate(block.irs): for i_cur in ir: if i_cur.is_function_call(): # /!\ never remove ir calls useful.add((block.label, k, i_cur)) if isinstance(i_cur.dst, ExprMem): useful.add((block.label, k, i_cur)) useful.update(block.defout[k][self.IRDst]) # Affecting return registers if not has_son: for r in self.get_out_regs(block): useful.update(block.defout[-1][r] if block. defout[-1][r] else block.cur_reach[-1][r]) return useful def _mark_useful_code(self): """Mark useful statements using previous reach analysis Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, Algorithm MK Return a set of triplets (block, instruction number, instruction) of useful instructions PRE: compute_reach(self) """ useful = self.init_useful_instr() worklist = useful.copy() while worklist: elem = worklist.pop() useful.add(elem) irb, irs_ind, ins = elem block = self.blocs[irb] instr_defout = block.defout[irs_ind] cur_kill = block.cur_kill[irs_ind] cur_reach = block.cur_reach[irs_ind] # Handle dependencies of used variables in ins for reg in ins.get_r(True).intersection(self.ira_regs_ids()): worklist.update(cur_reach[reg].difference(useful).difference( cur_kill[reg] if not instr_defout[reg] else set())) for _, _, i in instr_defout[reg]: # Loop case (i in defout of current block) if i == ins: worklist.update(cur_reach[reg].difference(useful)) return useful def remove_dead_code(self): """Remove dead instructions in each block of the graph using the reach analysis . Returns True if a block has been modified PRE : compute_reach(self) """ useful = self._mark_useful_code() modified = False for block in self.blocs.values(): modified |= self.remove_dead_instr(block, useful) return modified def set_dead_regs(self, b): pass def add_unused_regs(self): pass @staticmethod def print_set(v_set): """Print each triplet contained in a set @v_set: set containing triplets elements """ for p in v_set: print ' (%s, %s, %s)' % p def dump_bloc_state(self, irb): print '*' * 80 for k, irs in enumerate(irb.irs): for i in xrange(len(irs)): print 5 * "-" print 'instr', k, irs[i] print 5 * "-" for v in self.ira_regs_ids(): if irb.cur_reach[k][v]: print 'REACH[%d][%s]' % (k, v) self.print_set(irb.cur_reach[k][v]) if irb.cur_kill[k][v]: print 'KILL[%d][%s]' % (k, v) self.print_set(irb.cur_kill[k][v]) if irb.defout[k][v]: print 'DEFOUT[%d][%s]' % (k, v) self.print_set(irb.defout[k][v]) def compute_reach_block(self, irb): """Variable influence computation for a single block @irb: irbloc instance PRE: init_reach() """ reach_block = { key: value.copy() for key, value in irb.cur_reach[0].iteritems() } # Compute reach from predecessors for n_pred in self.g.predecessors(irb.label): p_block = self.blocs[n_pred] # Handle each register definition for c_reg in self.ira_regs_ids(): # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) pred_through = p_block.defout[-1][c_reg].union( p_block.cur_reach[-1][c_reg].difference( p_block.cur_kill[-1][c_reg])) reach_block[c_reg].update(pred_through) # If a predecessor has changed if reach_block != irb.cur_reach[0]: irb.cur_reach[0] = reach_block for c_reg in self.ira_regs_ids(): if irb.defout[0][c_reg]: # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY irb.cur_kill[0][c_reg].update( reach_block[c_reg].difference(irb.defout[0][c_reg])) # Compute reach and kill for block's instructions for i in xrange(1, len(irb.irs)): for c_reg in self.ira_regs_ids(): # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p) pred_through = irb.defout[i - 1][c_reg].union( irb.cur_reach[i - 1][c_reg].difference( irb.cur_kill[i - 1][c_reg])) irb.cur_reach[i][c_reg].update(pred_through) if irb.defout[i][c_reg]: # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY irb.cur_kill[i][c_reg].update( irb.cur_reach[i][c_reg].difference( irb.defout[i][c_reg])) def _test_kill_reach_fix(self): """Return True iff a fixed point has been reached during reach analysis""" fixed = True for node in self.g.nodes(): if node in self.blocs: irb = self.blocs[node] if (irb.cur_reach != irb.prev_reach or irb.cur_kill != irb.prev_kill): fixed = False irb.prev_reach = irb.cur_reach[:] irb.prev_kill = irb.cur_kill[:] return fixed def compute_reach(self): """ Compute reach, defout and kill sets until a fixed point is reached. Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 PRE: gen_graph() """ fixed_point = False log.debug('iteration...') while not fixed_point: for node in self.g.nodes(): if node in self.blocs: self.compute_reach_block(self.blocs[node]) fixed_point = self._test_kill_reach_fix() def dead_simp(self): """ This function is used to analyse relation of a * complete function * This means the blocks under study represent a solid full function graph. Source : Kennedy, K. (1979). A survey of data flow analysis techniques. IBM Thomas J. Watson Research Division, page 43 PRE: gen_graph() """ # Update r/w variables for all irblocs self.get_rw(self.ira_regs_ids()) # Liveness step self.compute_reach() self.remove_dead_code() # Simplify expressions self.simplify_blocs() def gen_equations(self): for irb in self.blocs.values(): symbols_init = {} for r in self.arch.regs.all_regs_ids: x = ExprId(r.name, r.size) x.is_term = True symbols_init[r] = x sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] for n_w in sb.symbols: v = sb.symbols[n_w] if n_w in symbols_init and symbols_init[n_w] == v: continue eqs.append(ExprAff(n_w, v)) print '*' * 40 print irb irb.irs = [eqs] irb.lines = [None] def sizeof_char(self): "Return the size of a char in bits" raise NotImplementedError("Abstract method") def sizeof_short(self): "Return the size of a short in bits" raise NotImplementedError("Abstract method") def sizeof_int(self): "Return the size of an int in bits" raise NotImplementedError("Abstract method") def sizeof_long(self): "Return the size of a long in bits" raise NotImplementedError("Abstract method") def sizeof_pointer(self): "Return the size of a void* in bits" raise NotImplementedError("Abstract method")