예제 #1
0
파일: asmbloc.py 프로젝트: avelik/miasm
class basicblocs:

    def __init__(self, ab=[]):
        self.blocs = {}
        self.g = DiGraph()
        self.add_blocs(ab)

    def add(self, b):
        self.blocs[b.label] = b
        self.g.add_node(b.label)
        for dst in b.bto:
            if isinstance(dst.label, asm_label):
                self.g.add_edge(b.label, dst.label)

    def add_blocs(self, ab):
        for b in ab:
            self.add(b)

    def get_bad_dst(self):
        o = set()
        for b in self.blocs.values():
            for c in b.bto:
                if c.c_t == asm_constraint.c_bad:
                    o.add(b)
        return o
예제 #2
0
def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb):
    for irblock in ir_arch.blocks.values():
        print irblock

    ir_arch.dead_simp()

    irblock_0 = None
    for irblock in ir_arch.blocks.values():
        if irblock.label.offset == ad:
            irblock_0 = irblock
            break
    assert (irblock_0 is not None)
    flow_graph = DiGraph()
    flow_graph.node2str = lambda n: node2str(flow_graph, n)

    for irblock in ir_arch.blocks.values():
        block_flow_cb(ir_arch, flow_graph, irblock)

    for irblock in ir_arch.blocks.values():
        print irblock
        print 'IN', [str(x) for x in irblock.in_nodes]
        print 'OUT', [str(x) for x in irblock.out_nodes]

    print '*' * 20, 'interblock', '*' * 20
    inter_bloc_flow(ir_arch, flow_graph, irblock_0.label)

    # from graph_qt import graph_qt
    # graph_qt(flow_graph)
    open('data.dot', 'w').write(flow_graph.dot())
예제 #3
0
def gen_bloc_data_flow_graph(ir_arch, ad, block_flow_cb):
    for irbloc in ir_arch.blocs.values():
        print irbloc

    ir_arch.gen_graph()
    ir_arch.dead_simp()

    irbloc_0 = None
    for irbloc in ir_arch.blocs.values():
        if irbloc.label.offset == ad:
            irbloc_0 = irbloc
            break
    assert(irbloc_0 is not None)
    flow_graph = DiGraph()
    flow_graph.node2str = lambda n: node2str(flow_graph, n)

    for irbloc in ir_arch.blocs.values():
        block_flow_cb(ir_arch, flow_graph, irbloc)

    for irbloc in ir_arch.blocs.values():
        print irbloc
        print 'IN', [str(x) for x in irbloc.in_nodes]
        print 'OUT', [str(x) for x in irbloc.out_nodes]

    print '*' * 20, 'interbloc', '*' * 20
    inter_bloc_flow(ir_arch, flow_graph, irbloc_0.label)

    # from graph_qt import graph_qt
    # graph_qt(flow_graph)
    open('data.dot', 'w').write(flow_graph.dot())
예제 #4
0
class basicblocs:

    def __init__(self, ab=[]):
        self.blocs = {}
        self.g = DiGraph()
        self.add_blocs(ab)

    def add(self, b):
        self.blocs[b.label] = b
        self.g.add_node(b.label)
        for dst in b.bto:
            if isinstance(dst.label, asm_label):
                self.g.add_edge(b.label, dst.label)

    def add_blocs(self, ab):
        for b in ab:
            self.add(b)

    def get_bad_dst(self):
        o = set()
        for b in self.blocs.values():
            for c in b.bto:
                if c.c_t == asm_constraint.c_bad:
                    o.add(b)
        return o
예제 #5
0
파일: depgraph.py 프로젝트: 0xf1sh/miasm
    def as_graph(self, starting_nodes):
        """Return a DiGraph corresponding to computed dependencies, with
        @starting_nodes as leafs
        @starting_nodes: set of DependencyNode instance
        """

        # Build subgraph for each starting_node
        subgraphs = []
        for starting_node in starting_nodes:
            subgraphs.append(self._build_depGraph(starting_node))

        # Merge subgraphs into a final DiGraph
        graph = DiGraph()
        for sourcegraph in subgraphs:
            for node in sourcegraph.nodes():
                graph.add_node(node)
            for edge in sourcegraph.edges():
                graph.add_uniq_edge(*edge)
        return graph
예제 #6
0
 def as_graph(self):
     """Generates a Digraph of dependencies"""
     graph = DiGraph()
     for node_a, node_b in self.links:
         if not node_b:
             graph.add_node(node_a)
         else:
             graph.add_edge(node_a, node_b)
     for parent, sons in self.pending.iteritems():
         for son in sons:
             graph.add_edge(parent, son)
     return graph
예제 #7
0
 def gen_graph(self, link_all=True):
     """
     Gen irbloc digraph
     @link_all: also gen edges to non present irblocs
     """
     self.g = DiGraph()
     for lbl, b in self.blocs.items():
         # print 'add', lbl
         self.g.add_node(lbl)
         # dst = self.get_bloc_dst(b)
         dst = self.dst_trackback(b)
         # print "\tdst", dst
         for d in dst:
             if isinstance(d, ExprInt):
                 d = ExprId(self.symbol_pool.getby_offset_create(int(
                     d.arg)))
             if self.ExprIsLabel(d):
                 if d.name in self.blocs or link_all is True:
                     self.g.add_edge(lbl, d.name)
예제 #8
0
    def __init__(self, segments, abicls, machine):
        self.segments = segments
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self.refs = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer() / 8
        self.sp = self._ira.sp.name
예제 #9
0
    def __init__(self, abicls, machine):
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self._instr_count = 0
        self._pending_call = []
        # Function addr -> list of information on calls
        self.function_calls = {}
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer() / 8
        self.sp = self._ira.sp.name
예제 #10
0
def gen_bloc_data_flow_graph(
        ir_arch, in_str, ad):  # arch, attrib, pool_bin, bloc, symbol_pool):
    out_str = ""

    # ir_arch = ir_x86_32(symbol_pool)

    for irbloc in ir_arch.blocs.values():
        print irbloc

    ir_arch.gen_graph()
    ir_arch.dead_simp()

    irbloc_0 = None
    for irbloc in ir_arch.blocs.values():
        if irbloc.label.offset == ad:
            irbloc_0 = irbloc
            break
    assert (irbloc_0 is not None)
    flow_graph = DiGraph()
    flow_graph.node2str = lambda n: node2str(flow_graph, n)
    done = set()
    todo = set([irbloc_0.label])

    bloc2w = {}

    for irbloc in ir_arch.blocs.values():
        intra_bloc_flow_raw(ir_arch, flow_graph, irbloc)
        # intra_bloc_flow_symb(ir_arch, flow_graph, irbloc)

    for irbloc in ir_arch.blocs.values():
        print irbloc
        print 'IN', [str(x) for x in irbloc.in_nodes]
        print 'OUT', [str(x) for x in irbloc.out_nodes]

    print '*' * 20, 'interbloc', '*' * 20
    inter_bloc_flow(ir_arch, flow_graph, irbloc_0.label)

    # sys.path.append('/home/serpilliere/projet/m2_devel/miasm2/core')
    # from graph_qt import graph_qt
    # graph_qt(flow_graph)
    open('data.txt', 'w').write(flow_graph.dot())
예제 #11
0
def gen_bloc_data_flow_graph(ir_arch, in_str, ad):  # arch, attrib, pool_bin, bloc, symbol_pool):
    out_str = ""

    # ir_arch = ir_x86_32(symbol_pool)

    for irbloc in ir_arch.blocs.values():
        print irbloc

    ir_arch.gen_graph()
    ir_arch.dead_simp()

    irbloc_0 = None
    for irbloc in ir_arch.blocs.values():
        if irbloc.label.offset == ad:
            irbloc_0 = irbloc
            break
    assert(irbloc_0 is not None)
    flow_graph = DiGraph()
    flow_graph.node2str = lambda n: node2str(flow_graph, n)
    done = set()
    todo = set([irbloc_0.label])

    bloc2w = {}

    for irbloc in ir_arch.blocs.values():
        intra_bloc_flow_raw(ir_arch, flow_graph, irbloc)
        # intra_bloc_flow_symb(ir_arch, flow_graph, irbloc)

    for irbloc in ir_arch.blocs.values():
        print irbloc
        print 'IN', [str(x) for x in irbloc.in_nodes]
        print 'OUT', [str(x) for x in irbloc.out_nodes]

    print '*' * 20, 'interbloc', '*' * 20
    inter_bloc_flow(ir_arch, flow_graph, irbloc_0.label)

    # sys.path.append('/home/serpilliere/projet/m2_devel/miasm2/core')
    # from graph_qt import graph_qt
    # graph_qt(flow_graph)
    open('data.txt', 'w').write(flow_graph.dot())
예제 #12
0
def gen_block_data_flow_graph(ir_arch, ircfg, ad, block_flow_cb):
    for irblock in ircfg.blocks.values():
        print irblock

    dead_simp(ir_arch, ircfg)


    irblock_0 = None
    for irblock in ircfg.blocks.values():
        loc_key = irblock.loc_key
        offset = ircfg.loc_db.get_location_offset(loc_key)
        if offset == ad:
            irblock_0 = irblock
            break
    assert(irblock_0 is not None)
    flow_graph = DiGraph()
    flow_graph.node2str = lambda n: node2str(flow_graph, n)


    irb_in_nodes = {}
    irb_out_nodes = {}
    for label in ircfg.blocks:
        irb_in_nodes[label] = {}
        irb_out_nodes[label] = {}

    for label, irblock in ircfg.blocks.iteritems():
        block_flow_cb(ir_arch, ircfg, flow_graph, irblock, irb_in_nodes[label], irb_out_nodes[label])

    for label in ircfg.blocks:
        print label
        print 'IN', [str(x) for x in irb_in_nodes[label]]
        print 'OUT', [str(x) for x in irb_out_nodes[label]]

    print '*' * 20, 'interblock', '*' * 20
    inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes)

    # from graph_qt import graph_qt
    # graph_qt(flow_graph)
    open('data.dot', 'w').write(flow_graph.dot())
예제 #13
0
def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb):
    for irblock in ir_arch.blocks.values():
        print irblock

    dead_simp(ir_arch)

    irblock_0 = None
    for irblock in ir_arch.blocks.values():
        loc_key = irblock.loc_key
        offset = ir_arch.loc_db.get_location_offset(loc_key)
        if offset == ad:
            irblock_0 = irblock
            break
    assert (irblock_0 is not None)
    flow_graph = DiGraph()
    flow_graph.node2str = lambda n: node2str(flow_graph, n)

    irb_in_nodes = {}
    irb_out_nodes = {}
    for label in ir_arch.blocks:
        irb_in_nodes[label] = {}
        irb_out_nodes[label] = {}

    for label, irblock in ir_arch.blocks.iteritems():
        block_flow_cb(ir_arch, flow_graph, irblock, irb_in_nodes[label],
                      irb_out_nodes[label])

    for label in ir_arch.blocks:
        print label
        print 'IN', [str(x) for x in irb_in_nodes[label]]
        print 'OUT', [str(x) for x in irb_out_nodes[label]]

    print '*' * 20, 'interblock', '*' * 20
    inter_block_flow(ir_arch, flow_graph, irblock_0.loc_key, irb_in_nodes,
                     irb_out_nodes)

    # from graph_qt import graph_qt
    # graph_qt(flow_graph)
    open('data.dot', 'w').write(flow_graph.dot())
예제 #14
0
def unflatGraph(flat_graph):
    graph = DiGraph()
    nodes, edges = flat_graph
    for node in nodes:
        graph.add_node(node)
    for nodeA, nodeB in edges:
        graph.add_edge(nodeA, nodeB)
    return graph
예제 #15
0
 def as_graph(self):
     """Generates a Digraph of dependencies"""
     graph = DiGraph()
     for node_a, node_b in self.links:
         if not node_b:
             graph.add_node(node_a)
         else:
             graph.add_edge(node_a, node_b)
     for parent, sons in self.pending.iteritems():
         for son in sons:
             graph.add_edge(parent, son)
     return graph
예제 #16
0
def blist2graph(ab):
    """
    ab: list of asmbloc
    return: graph of asmbloc
    """
    g = DiGraph()
    g.lbl2bloc = {}
    for b in ab:
        g.lbl2bloc[b.label] = b
        g.add_node(b.label)
        for x in b.bto:
            g.add_edge(b.label, x.label)
    return g
예제 #17
0
파일: analysis.py 프로젝트: CaineQT/miasm
 def gen_graph(self, link_all=True):
     """
     Gen irbloc digraph
     @link_all: also gen edges to non present irblocs
     """
     self.g = DiGraph()
     for lbl, b in self.blocs.items():
         # print 'add', lbl
         self.g.add_node(lbl)
         # dst = self.get_bloc_dst(b)
         dst = self.dst_trackback(b)
         # print "\tdst", dst
         for d in dst:
             if isinstance(d, ExprInt):
                 d = ExprId(self.symbol_pool.getby_offset_create(int(d.arg)))
             if self.ExprIsLabel(d):
                 if d.name in self.blocs or link_all is True:
                     self.g.add_edge(lbl, d.name)
예제 #18
0
파일: asmbloc.py 프로젝트: avelik/miasm
def blist2graph(ab):
    """
    ab: list of asmbloc
    return: graph of asmbloc
    """
    g = DiGraph()
    g.lbl2bloc = {}
    for b in ab:
        g.lbl2bloc[b.label] = b
        g.add_node(b.label)
        for x in b.bto:
            g.add_edge(b.label, x.label)
    return g
예제 #19
0
파일: trace.py 프로젝트: cea-sec/Sibyl
    def __init__(self, abicls, machine):
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self._instr_count = 0
        self._pending_call = []
        # Function addr -> list of information on calls
        self.function_calls = {}
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer()/8
        self.sp = self._ira.sp.name
예제 #20
0
    def as_graph(self, starting_nodes):
        """Return a DiGraph corresponding to computed dependencies, with
        @starting_nodes as leafs
        @starting_nodes: set of DependencyNode instance
        """

        # Build subgraph for each starting_node
        subgraphs = []
        for starting_node in starting_nodes:
            subgraphs.append(self._build_depgraph(starting_node))

        # Merge subgraphs into a final DiGraph
        graph = DiGraph()
        for sourcegraph in subgraphs:
            for node in sourcegraph.nodes():
                graph.add_node(node)
            for edge in sourcegraph.edges():
                graph.add_uniq_edge(*edge)
        return graph
예제 #21
0
class ira:

    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        out = set()
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if not o in found:
                    follow.add(o)
            todo = follow
        out = self.sort_dst(todo, done)

        return done

    def gen_graph(self, link_all = False):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(
                        self.symbol_pool.getby_offset_create(int(d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
    """
        all_lbls = {}
        for lbl in self.g.nodes():
            if not lbl in self.blocs:
                continue
            b = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in b.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[id(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            out += '%s -> %s;\n' % (id(a), id(b))
        out += '}'
        return out

    def remove_dead(self, b):
        for ir, _, c_out in zip(b.irs, b.c_in, b.c_out):
            j = 0
            while j < len(ir):
                i_cur = ir[j]
                if not isinstance(i_cur.dst, ExprId):
                    pass
                elif (isinstance(i_cur.src, ExprOp) and
                    i_cur.src.op.startswith('call')):
                    # /!\ never remove ir calls
                    pass
                elif not i_cur.dst in c_out:
                    del(ir[j])
                    continue
                j += 1

    def remove_blocs_dead(self):
        for b in self.blocs.values():
            self.remove_dead(b)

    # for test XXX TODO
    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    def compute_in_out(self, b):
        # get out/in from bloc sons
        modified = False
        # set b in
        if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))):
            modified = True
        b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1])))

        # set b out
        c_out = set()
        has_son = False
        for n_son in self.g.successors(b.label):
            # print n_me, n_son
            has_son = True
            if not n_son in self.blocs:
                print "leaf has lost her sons!"
                continue
            b_son = self.blocs[n_son]
            c_out.update(b_son.c_in[0])
        if not has_son:
            # special case: leaf nodes architecture dependant
            c_out = self.get_out_regs(b)
        if b.c_out[-1] != set(c_out):
            modified = True
        b.c_out[-1] = set(c_out)

        # get out/in for bloc
        for i in reversed(xrange(len(b.irs))):
            if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))):
                modified = True
            b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i])))
            if b.c_out[i] != set(b.c_in[i + 1]):
                modified = True
            b.c_out[i] = set(b.c_in[i + 1])
        return modified

    def test_in_out_fix(self):
        fixed = True
        for n in self.g.nodes():
            if not n in self.blocs:
                # leaf has lost her son
                continue
            b = self.blocs[n]
            if b.c_in != b.l_in or b.c_out != b.l_out:
                fixed = False
            b.l_in = [set(x) for x in b.c_in]
            b.l_out = [set(x) for x in b.c_out]
        return fixed

    def compute_dead(self):
        self.get_rw()

        it = 0
        fixed_point = False
        print 'iteration...',
        while not fixed_point:
            print it,
            it += 1
            for n in self.g.nodes():
                if not n in self.blocs:
                    # leaf has lost her son
                    continue
                b = self.blocs[n]
                self.compute_in_out(b)

            fixed_point = self.test_in_out_fix()
        print

    def dead_simp(self):
        self.compute_dead()
        self.remove_blocs_dead()
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            for eq in eqs:
                eq
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
예제 #22
0
class ira:
    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        out = set()
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if o not in found:
                    follow.add(o)
            todo = follow
        out = self.sort_dst(todo, done)

        return done

    def gen_graph(self, link_all=True):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(self.symbol_pool.getby_offset_create(int(
                        d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        """Output the graphviz script"""
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
        """
        all_lbls = {}
        for lbl in self.g.nodes():
            if lbl not in self.blocs:
                continue
            irb = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in irb.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            # print l, v
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            # print 'edge', a, b, hash(a), hash(b)
            out += '%s -> %s;\n' % (hash(a), hash(b))
        out += '}'
        return out

    def remove_dead(self, irb):
        """Remove dead affectations using previous liveness analysis
        @irb: irbloc instance
        Return True iff the bloc state has changed
        PRE: compute_in_out(@irb)
        """

        # print 'state1'
        # self.dump_bloc_state(irb)

        modified = False
        for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out):
            j = 0
            while j < len(ir):
                i_cur = ir[j]
                if not isinstance(i_cur.dst, ExprId):
                    pass
                elif i_cur.dst == self.IRDst:
                    # never delete irdst
                    pass
                elif (isinstance(i_cur.src, ExprOp)
                      and i_cur.src.op.startswith('call')):
                    # /!\ never remove ir calls
                    pass
                elif i_cur.dst not in c_out:
                    del (ir[j])
                    modified = True
                    continue
                j += 1

        # print 'state2'
        # self.dump_bloc_state(irb)

        return modified

    def remove_blocs_dead(self):
        """Call remove_dead on each irbloc
        Return True iff one of the bloc state has changed
        """
        modified = False
        for b in self.blocs.values():
            modified |= self.remove_dead(b)
        return modified

    # for test XXX TODO
    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    def dump_bloc_state(self, irb):
        print '*' * 80
        for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in,
                                                  irb.c_out)):
            print 'ir'
            for x in ir:
                print '\t', x
            print 'R', [str(x) for x in irb.r[i]]  #c_in]
            print 'W', [str(x) for x in irb.w[i]]  #c_out]
            print 'IN', [str(x) for x in c_in]
            print 'OUT', [str(x) for x in c_out]

    def compute_in_out(self, irb):
        """Liveness computation for a single bloc
        @irb: irbloc instance
        Return True iff bloc state has changed
        """
        modified = False

        # Compute OUT for last irb entry
        c_out = set()
        has_son = False
        for n_son in self.g.successors(irb.label):
            has_son = True
            if n_son not in self.blocs:
                # If the son is not defined, we will propagate our current out
                # nodes to the in nodes's son
                son_c_in = irb.c_out_missing
            else:
                son_c_in = self.blocs[n_son].c_in[0]
            c_out.update(son_c_in)
        if not has_son:
            # Special case: leaf nodes architecture dependant
            c_out = self.get_out_regs(irb)

        if irb.c_out[-1] != c_out:
            irb.c_out[-1] = c_out
            modified = True

        # Compute out/in intra bloc
        for i in reversed(xrange(len(irb.irs))):
            new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i])))
            if irb.c_in[i] != new_in:
                irb.c_in[i] = new_in
                modified = True

            if i >= len(irb.irs) - 1:
                # Last out has been previously updated
                continue
            new_out = set(irb.c_in[i + 1])
            if irb.c_out[i] != new_out:
                irb.c_out[i] = new_out
                modified = True

        return modified

    def test_in_out_fix(self):
        """Return True iff a fixed point has been reached during liveness
        analysis"""

        fixed = True
        for node in self.g.nodes():
            if node not in self.blocs:
                # leaf has lost her son
                continue
            irb = self.blocs[node]
            if irb.c_in != irb.l_in or irb.c_out != irb.l_out:
                fixed = False
            irb.l_in = [set(x) for x in irb.c_in]
            irb.l_out = [set(x) for x in irb.c_out]
        return fixed

    def fill_missing_son_c_in(self):
        """Find nodes with missing sons in graph, and add virtual link to all
        written variables of all parents.
        PRE: gen_graph() and get_rw()"""

        for node in self.g.nodes():
            if node not in self.blocs:
                continue
            self.blocs[node].c_out_missing = set()
            has_all_son = True
            for node_son in self.g.successors(node):
                if node_son not in self.blocs:
                    has_all_son = False
                    break
            if has_all_son:
                continue
            parents = self.g.reachable_parents(node)
            for parent in parents:
                irb = self.blocs[parent]
                for var_w in irb.w:
                    self.blocs[node].c_out_missing.update(var_w)

    def compute_dead(self):
        """Iterate liveness analysis until a fixed point is reached.
        PRE: gen_graph()
        """

        it = 0
        fixed_point = False
        log.debug('iteration...')
        while not fixed_point:
            log.debug(it)
            it += 1
            for n in self.g.nodes():
                if n not in self.blocs:
                    # leaf has lost her son
                    continue
                irb = self.blocs[n]
                self.compute_in_out(irb)

            fixed_point = self.test_in_out_fix()

    def dead_simp(self):
        """This function is used to analyse relation of a * complete function *
        This mean the blocs under study represent a solid full function graph.

        Ref: CS 5470 Compiler Techniques and Principles (Liveness
        analysis/Dataflow equations)

        PRE: call to gen_graph
        """

        modified = True
        while modified:
            log.debug('dead_simp step')

            # Update r/w variables for all irblocs
            self.get_rw()
            # Fill c_in for missing sons
            self.fill_missing_son_c_in()

            # Liveness step
            self.compute_dead()
            modified = self.remove_blocs_dead()

        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
예제 #23
0
class Snapshot(object):
    @classmethod
    def get_byte(cls, value, byte):
        '''Return the byte @byte of the value'''
        return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte))

    @classmethod
    def unpack_ptr(cls, value):
        return struct.unpack('@P', value)[0]

    def __init__(self, abicls, machine):
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self._instr_count = 0
        self._pending_call = []
        # Function addr -> list of information on calls
        self.function_calls = {}
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer() / 8
        self.sp = self._ira.sp.name

    def add_input_register(self, reg_name, reg_value):
        self.input_reg[reg_name] = reg_value

    def add_output_register(self, reg_name, reg_value):
        self.output_reg[reg_name] = reg_value

    def add_memory_read(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(
                1,
                Snapshot.get_byte(value, i),
                0,  # Output access never used
            )

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(
                    1,
                    Snapshot.get_byte(value, i),
                    PAGE_READ,
                )

            else:
                self.in_memory[address + i].access |= PAGE_READ

    def add_memory_write(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(
                1,
                Snapshot.get_byte(value, i),
                0,  # Output access never used
            )

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(
                    1,
                    "\x00",
                    # The value is
                    # not used by the
                    # test
                    PAGE_WRITE,
                )

            else:
                self.in_memory[address + i].access |= PAGE_WRITE

    def add_executed_instruction(self, address):
        '''
        Function called to signal that the address has been executed
        This function has to be called in the order of their executed instruction
        Else paths can not be updated correctly
        '''
        self._previous_addr = self._current_addr
        self._current_addr = address
        self.paths.add_uniq_edge(self._previous_addr, self._current_addr)
        self._instr_count += 1

        # Resolve call destination
        if (self._pending_call and self._previous_addr
                == self._pending_call[-1]["caller_addr"]):
            info = self._pending_call[-1]
            info["dest"] = address
            info["beg"] = self._instr_count

    def add_call(self, caller_addr, stack_ptr):
        '''
        Function call, target is not determined yet
        called *before* instruction execution
        '''
        info = {
            "stack_ptr": stack_ptr,
            "caller_addr": caller_addr,
        }
        self._pending_call.append(info)

    def add_ret(self, ret_addr, stack_ptr, value):
        '''
        Function ret
        called *after* instruction execution
        '''
        # Find corresponding call
        assert self._pending_call
        assert self._pending_call[-1]["stack_ptr"] >= stack_ptr

        info = self._pending_call.pop()
        info["end"] = self._instr_count
        info["ret"] = value
        current_interval = self.function_calls.setdefault(
            info["dest"], list()).append(info)

    def clean(self):
        """Clean the snapshot for further uses"""

        self.agglomerate_memory(self.in_memory)
        self.agglomerate_memory(self.out_memory)

    def agglomerate_memory(self, mem):
        '''
        Assuming @mem is only composed of non-overlapping block
        this function agglomerate contiguous blocks having the same access right
        '''
        for addr in sorted(mem.keys()):

            # if the addr is not already deleted
            if addr in mem:

                end_addr = addr + mem[addr].size
                while end_addr in mem:
                    cur_mem = mem[addr]
                    next_mem = mem[end_addr]

                    # If access change, do not agglomerate
                    if cur_mem.access != next_mem.access:
                        break

                    cur_mem.size += next_mem.size
                    cur_mem.data += next_mem.data
                    del mem[end_addr]
                    end_addr += next_mem.size
예제 #24
0
파일: depgraph.py 프로젝트: 0xf1sh/miasm
    def _build_depGraph(self, depnode):
        """Recursively build the final list of DiGraph, and clean up unmodifier
        nodes
        @depnode: starting node
        """

        if depnode not in self._cache or \
                not self._cache[depnode]:
            ## There is no dependency
            graph = DiGraph()
            graph.add_node(depnode)
            return graph

        # Recursion
        dependencies = list(self._cache[depnode])

        graphs = []
        for sub_depnode in dependencies:
            graphs.append(self._build_depGraph(sub_depnode))

        # head(graphs[i]) == dependencies[i]
        graph = DiGraph()
        graph.add_node(depnode)
        for head in dependencies:
            graph.add_uniq_edge(head, depnode)

        for subgraphs in itertools.product(graphs):
            for sourcegraph in subgraphs:
                for node in sourcegraph.nodes():
                    graph.add_node(node)
                for edge in sourcegraph.edges():
                    graph.add_uniq_edge(*edge)

        # Update the running queue
        return graph
예제 #25
0
파일: analysis.py 프로젝트: vardyh/miasm
class ira:
    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        out = set()
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if not o in found:
                    follow.add(o)
            todo = follow
        out = self.sort_dst(todo, done)

        return done

    def gen_graph(self, link_all=False):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(self.symbol_pool.getby_offset_create(int(
                        d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
    """
        all_lbls = {}
        for lbl in self.g.nodes():
            if not lbl in self.blocs:
                continue
            b = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in b.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[id(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            out += '%s -> %s;\n' % (id(a), id(b))
        out += '}'
        return out

    def remove_dead(self, b):
        for ir, _, c_out in zip(b.irs, b.c_in, b.c_out):
            j = 0
            while j < len(ir):
                i_cur = ir[j]
                if not isinstance(i_cur.dst, ExprId):
                    pass
                elif (isinstance(i_cur.src, ExprOp)
                      and i_cur.src.op.startswith('call')):
                    # /!\ never remove ir calls
                    pass
                elif not i_cur.dst in c_out:
                    del (ir[j])
                    continue
                j += 1

    def remove_blocs_dead(self):
        for b in self.blocs.values():
            self.remove_dead(b)

    # for test XXX TODO
    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    def compute_in_out(self, b):
        # get out/in from bloc sons
        modified = False
        # set b in
        if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))):
            modified = True
        b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1])))

        # set b out
        c_out = set()
        has_son = False
        for n_son in self.g.successors(b.label):
            # print n_me, n_son
            has_son = True
            if not n_son in self.blocs:
                print "leaf has lost her sons!"
                continue
            b_son = self.blocs[n_son]
            c_out.update(b_son.c_in[0])
        if not has_son:
            # special case: leaf nodes architecture dependant
            c_out = self.get_out_regs(b)
        if b.c_out[-1] != set(c_out):
            modified = True
        b.c_out[-1] = set(c_out)

        # get out/in for bloc
        for i in reversed(xrange(len(b.irs))):
            if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))):
                modified = True
            b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i])))
            if b.c_out[i] != set(b.c_in[i + 1]):
                modified = True
            b.c_out[i] = set(b.c_in[i + 1])
        return modified

    def test_in_out_fix(self):
        fixed = True
        for n in self.g.nodes():
            if not n in self.blocs:
                # leaf has lost her son
                continue
            b = self.blocs[n]
            if b.c_in != b.l_in or b.c_out != b.l_out:
                fixed = False
            b.l_in = [set(x) for x in b.c_in]
            b.l_out = [set(x) for x in b.c_out]
        return fixed

    def compute_dead(self):
        self.get_rw()

        it = 0
        fixed_point = False
        print 'iteration...',
        while not fixed_point:
            print it,
            it += 1
            for n in self.g.nodes():
                if not n in self.blocs:
                    # leaf has lost her son
                    continue
                b = self.blocs[n]
                self.compute_in_out(b)

            fixed_point = self.test_in_out_fix()
        print

    def dead_simp(self):
        self.compute_dead()
        self.remove_blocs_dead()
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            for eq in eqs:
                eq
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
예제 #26
0
파일: asmbloc.py 프로젝트: avelik/miasm
 def __init__(self, ab=[]):
     self.blocs = {}
     self.g = DiGraph()
     self.add_blocs(ab)
예제 #27
0
파일: analysis.py 프로젝트: CaineQT/miasm
class ira:
    def ira_regs_ids(self):
        """Returns ids of all registers used in the IR"""
        return self.arch.regs.all_regs_ids + [self.IRDst]

    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if o not in found:
                    follow.add(o)
            todo = follow

        return done

    def gen_graph(self, link_all=True):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(self.symbol_pool.getby_offset_create(int(d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        """Output the graphviz script"""
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
        """
        all_lbls = {}
        for lbl in self.g.nodes():
            if lbl not in self.blocs:
                continue
            irb = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in irb.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            # print l, v
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            # print 'edge', a, b, hash(a), hash(b)
            out += "%s -> %s;\n" % (hash(a), hash(b))
        out += "}"
        return out

    def remove_dead_instr(self, irb, useful):
        """Remove dead affectations using previous reaches analysis
        @irb: irbloc instance
        @useful: useful statements from previous reach analysis
        Return True iff the block state has changed
        PRE: compute_reach(self)
        """
        modified = False
        for k, ir in enumerate(irb.irs):
            j = 0
            while j < len(ir):
                cur_instr = ir[j]
                if isinstance(cur_instr.dst, ExprId) and (irb.label, k, cur_instr) not in useful:
                    del ir[j]
                    modified = True
                else:
                    j += 1
        return modified

    def init_useful_instr(self):
        """Computes a set of triples (block, instruction number, instruction)
        containing initially useful instructions :
          - Instructions affecting final value of return registers
          - Instructions affecting IRDst register
          - Instructions writing in memory
          - Function call instructions
        Return set of intial useful instructions
        """

        useful = set()

        for node in self.g.nodes():
            if node not in self.blocs:
                continue

            block = self.blocs[node]
            successors = self.g.successors(node)
            has_son = bool(successors)
            for p_son in successors:
                if p_son not in self.blocs:
                    # Leaf has lost its son: don't remove anything
                    # reaching this block
                    for r in self.ira_regs_ids():
                        useful.update(block.cur_reach[-1][r].union(block.defout[-1][r]))

            # Function call, memory write or IRDst affectation
            for k, ir in enumerate(block.irs):
                for i_cur in ir:
                    if i_cur.src.is_function_call():
                        # /!\ never remove ir calls
                        useful.add((block.label, k, i_cur))
                    if isinstance(i_cur.dst, ExprMem):
                        useful.add((block.label, k, i_cur))
                    useful.update(block.defout[k][self.IRDst])

            # Affecting return registers
            if not has_son:
                for r in self.get_out_regs(block):
                    useful.update(block.defout[-1][r] if block.defout[-1][r] else block.cur_reach[-1][r])

        return useful

    def _mark_useful_code(self):
        """Mark useful statements using previous reach analysis

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division,  Algorithm MK

        Return a set of triplets (block, instruction number, instruction) of
        useful instructions
        PRE: compute_reach(self)

        """

        useful = self.init_useful_instr()
        worklist = useful.copy()
        while worklist:
            elem = worklist.pop()
            useful.add(elem)
            irb, irs_ind, ins = elem

            block = self.blocs[irb]
            instr_defout = block.defout[irs_ind]
            cur_kill = block.cur_kill[irs_ind]
            cur_reach = block.cur_reach[irs_ind]

            # Handle dependencies of used variables in ins
            for reg in ins.get_r(True).intersection(self.ira_regs_ids()):
                worklist.update(
                    cur_reach[reg].difference(useful).difference(cur_kill[reg] if not instr_defout[reg] else set())
                )
                for _, _, i in instr_defout[reg]:
                    # Loop case (i in defout of current block)
                    if i == ins:
                        worklist.update(cur_reach[reg].difference(useful))
        return useful

    def remove_dead_code(self):
        """Remove dead instructions in each block of the graph using the reach
        analysis .
        Returns True if a block has been modified
        PRE : compute_reach(self)
        """
        useful = self._mark_useful_code()
        modified = False
        for block in self.blocs.values():
            modified |= self.remove_dead_instr(block, useful)
        return modified

    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    @staticmethod
    def print_set(v_set):
        """Print each triplet contained in a set
        @v_set: set containing triplets elements
        """
        for p in v_set:
            print "    (%s, %s, %s)" % p

    def dump_bloc_state(self, irb):
        print "*" * 80
        for k, irs in enumerate(irb.irs):
            for i in xrange(len(irs)):
                print 5 * "-"
                print "instr", k, irs[i]
                print 5 * "-"
                for v in self.ira_regs_ids():
                    if irb.cur_reach[k][v]:
                        print "REACH[%d][%s]" % (k, v)
                        self.print_set(irb.cur_reach[k][v])
                    if irb.cur_kill[k][v]:
                        print "KILL[%d][%s]" % (k, v)
                        self.print_set(irb.cur_kill[k][v])
                    if irb.defout[k][v]:
                        print "DEFOUT[%d][%s]" % (k, v)
                        self.print_set(irb.defout[k][v])

    def compute_reach_block(self, irb):
        """Variable influence computation for a single block
        @irb: irbloc instance
        PRE: init_reach()
        """

        reach_block = {key: value.copy() for key, value in irb.cur_reach[0].iteritems()}

        # Compute reach from predecessors
        for n_pred in self.g.predecessors(irb.label):
            p_block = self.blocs[n_pred]

            # Handle each register definition
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = p_block.defout[-1][c_reg].union(
                    p_block.cur_reach[-1][c_reg].difference(p_block.cur_kill[-1][c_reg])
                )
                reach_block[c_reg].update(pred_through)

        # If a predecessor has changed
        if reach_block != irb.cur_reach[0]:
            irb.cur_reach[0] = reach_block
            for c_reg in self.ira_regs_ids():
                if irb.defout[0][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[0][c_reg].update(reach_block[c_reg].difference(irb.defout[0][c_reg]))

        # Compute reach and kill for block's instructions
        for i in xrange(1, len(irb.irs)):
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = irb.defout[i - 1][c_reg].union(
                    irb.cur_reach[i - 1][c_reg].difference(irb.cur_kill[i - 1][c_reg])
                )
                irb.cur_reach[i][c_reg].update(pred_through)
                if irb.defout[i][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[i][c_reg].update(irb.cur_reach[i][c_reg].difference(irb.defout[i][c_reg]))

    def _test_kill_reach_fix(self):
        """Return True iff a fixed point has been reached during reach
        analysis"""

        fixed = True
        for node in self.g.nodes():
            if node in self.blocs:
                irb = self.blocs[node]
                if irb.cur_reach != irb.prev_reach or irb.cur_kill != irb.prev_kill:
                    fixed = False
                    irb.prev_reach = irb.cur_reach[:]
                    irb.prev_kill = irb.cur_kill[:]
        return fixed

    def compute_reach(self):
        """
        Compute reach, defout and kill sets until a fixed point is reached.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43

        PRE: gen_graph()
        """
        fixed_point = False
        log.debug("iteration...")
        while not fixed_point:
            for node in self.g.nodes():
                if node in self.blocs:
                    self.compute_reach_block(self.blocs[node])
            fixed_point = self._test_kill_reach_fix()

    def dead_simp(self):
        """
        This function is used to analyse relation of a * complete function *
        This means the blocks under study represent a solid full function graph.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43

        PRE: gen_graph()
        """
        # Update r/w variables for all irblocs
        self.get_rw(self.ira_regs_ids())
        # Liveness step
        self.compute_reach()
        self.remove_dead_code()
        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print "*" * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
예제 #28
0
 def as_graph(self):
     """Generates a Digraph of dependencies"""
     graph = DiGraph()
     for node_a, node_b in self.links:
         graph.add_edge(node_b, node_a)
     return graph
예제 #29
0
파일: trace.py 프로젝트: cea-sec/Sibyl
class Snapshot(object):

    @classmethod
    def get_byte(cls, value, byte):
        '''Return the byte @byte of the value'''
        return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte))

    @classmethod
    def unpack_ptr(cls, value):
        return struct.unpack('@P', value)[0]

    def __init__(self, abicls, machine):
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self._instr_count = 0
        self._pending_call = []
        # Function addr -> list of information on calls
        self.function_calls = {}
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer()/8
        self.sp = self._ira.sp.name

    def add_input_register(self, reg_name, reg_value):
        self.input_reg[reg_name] = reg_value

    def add_output_register(self, reg_name, reg_value):
        self.output_reg[reg_name] = reg_value

    def add_memory_read(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(1,
                                                        Snapshot.get_byte(value, i),
                                                        0,  # Output access never used
            )

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(1,
                                                           Snapshot.get_byte(value, i),
                                                           PAGE_READ,
                )

            else:
                self.in_memory[address + i].access |= PAGE_READ

    def add_memory_write(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(1,
                                                        Snapshot.get_byte(value, i),
                                                        0,  # Output access never used
            )

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(1,
                                                           "\x00",
                                                           # The value is
                                                           # not used by the
                                                           # test
                                                           PAGE_WRITE,
                )

            else:
                self.in_memory[address + i].access |= PAGE_WRITE

    def add_executed_instruction(self, address):
        '''
        Function called to signal that the address has been executed
        This function has to be called in the order of their executed instruction
        Else paths can not be updated correctly
        '''
        self._previous_addr = self._current_addr
        self._current_addr = address
        self.paths.add_uniq_edge(self._previous_addr, self._current_addr)
        self._instr_count += 1

        # Resolve call destination
        if (self._pending_call and
            self._previous_addr == self._pending_call[-1]["caller_addr"]):
            info = self._pending_call[-1]
            info["dest"] = address
            info["beg"] = self._instr_count


    def add_call(self, caller_addr, stack_ptr):
        '''
        Function call, target is not determined yet
        called *before* instruction execution
        '''
        info = {"stack_ptr": stack_ptr,
                "caller_addr": caller_addr,
        }
        self._pending_call.append(info)

    def add_ret(self, ret_addr, stack_ptr, value):
        '''
        Function ret
        called *after* instruction execution
        '''
        # Find corresponding call
        assert self._pending_call
        assert self._pending_call[-1]["stack_ptr"] >= stack_ptr

        info = self._pending_call.pop()
        info["end"] = self._instr_count
        info["ret"] = value
        current_interval = self.function_calls.setdefault(info["dest"],
                                                          list()).append(info)

    def clean(self):
        """Clean the snapshot for further uses"""

        self.agglomerate_memory(self.in_memory)
        self.agglomerate_memory(self.out_memory)

    def agglomerate_memory(self, mem):
        '''
        Assuming @mem is only composed of non-overlapping block
        this function agglomerate contiguous blocks having the same access right
        '''
        for addr in sorted(mem.keys()):

            # if the addr is not already deleted
            if addr in mem:

                end_addr = addr + mem[addr].size
                while end_addr in mem:
                    cur_mem = mem[addr]
                    next_mem = mem[end_addr]

                    # If access change, do not agglomerate
                    if cur_mem.access != next_mem.access:
                        break

                    cur_mem.size += next_mem.size
                    cur_mem.data += next_mem.data
                    del mem[end_addr]
                    end_addr += next_mem.size
예제 #30
0
파일: analysis.py 프로젝트: avelik/miasm
class ira:

    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        out = set()
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if o not in found:
                    follow.add(o)
            todo = follow
        out = self.sort_dst(todo, done)

        return done

    def gen_graph(self, link_all = True):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(
                        self.symbol_pool.getby_offset_create(int(d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        """Output the graphviz script"""
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
        """
        all_lbls = {}
        for lbl in self.g.nodes():
            if lbl not in self.blocs:
                continue
            irb = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in irb.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            # print l, v
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            # print 'edge', a, b, hash(a), hash(b)
            out += '%s -> %s;\n' % (hash(a), hash(b))
        out += '}'
        return out

    def remove_dead(self, irb):
        """Remove dead affectations using previous liveness analysis
        @irb: irbloc instance
        Return True iff the bloc state has changed
        PRE: compute_in_out(@irb)
        """

        # print 'state1'
        # self.dump_bloc_state(irb)

        modified = False
        for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out):
            j = 0
            while j < len(ir):
                i_cur = ir[j]
                if not isinstance(i_cur.dst, ExprId):
                    pass
                elif i_cur.dst == self.IRDst:
                    # never delete irdst
                    pass
                elif (isinstance(i_cur.src, ExprOp) and
                    i_cur.src.op.startswith('call')):
                    # /!\ never remove ir calls
                    pass
                elif i_cur.dst not in c_out:
                    del(ir[j])
                    modified = True
                    continue
                j += 1

        # print 'state2'
        # self.dump_bloc_state(irb)

        return modified

    def remove_blocs_dead(self):
        """Call remove_dead on each irbloc
        Return True iff one of the bloc state has changed
        """
        modified = False
        for b in self.blocs.values():
            modified |= self.remove_dead(b)
        return modified

    # for test XXX TODO
    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    def dump_bloc_state(self, irb):
        print '*'*80
        for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)):
            print 'ir'
            for x in ir:
                print '\t', x
            print 'R', [str(x) for x in irb.r[i]]#c_in]
            print 'W', [str(x) for x in irb.w[i]]#c_out]
            print 'IN', [str(x) for x in c_in]
            print 'OUT', [str(x) for x in c_out]


    def compute_in_out(self, irb):
        """Liveness computation for a single bloc
        @irb: irbloc instance
        Return True iff bloc state has changed
        """
        modified = False

        # Compute OUT for last irb entry
        c_out = set()
        has_son = False
        for n_son in self.g.successors(irb.label):
            has_son = True
            if n_son not in self.blocs:
                # If the son is not defined, we will propagate our current out
                # nodes to the in nodes's son
                son_c_in = irb.c_out_missing
            else:
                son_c_in = self.blocs[n_son].c_in[0]
            c_out.update(son_c_in)
        if not has_son:
            # Special case: leaf nodes architecture dependant
            c_out = self.get_out_regs(irb)

        if irb.c_out[-1] != c_out:
            irb.c_out[-1] = c_out
            modified = True

        # Compute out/in intra bloc
        for i in reversed(xrange(len(irb.irs))):
            new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i])))
            if irb.c_in[i] != new_in:
                irb.c_in[i] = new_in
                modified = True

            if i >= len(irb.irs) - 1:
                # Last out has been previously updated
                continue
            new_out = set(irb.c_in[i + 1])
            if irb.c_out[i] != new_out:
                irb.c_out[i] = new_out
                modified = True

        return modified

    def test_in_out_fix(self):
        """Return True iff a fixed point has been reached during liveness
        analysis"""

        fixed = True
        for node in self.g.nodes():
            if node not in self.blocs:
                # leaf has lost her son
                continue
            irb = self.blocs[node]
            if irb.c_in != irb.l_in or irb.c_out != irb.l_out:
                fixed = False
            irb.l_in = [set(x) for x in irb.c_in]
            irb.l_out = [set(x) for x in irb.c_out]
        return fixed

    def fill_missing_son_c_in(self):
        """Find nodes with missing sons in graph, and add virtual link to all
        written variables of all parents.
        PRE: gen_graph() and get_rw()"""

        for node in self.g.nodes():
            if node not in self.blocs:
                continue
            self.blocs[node].c_out_missing = set()
            has_all_son = True
            for node_son in self.g.successors(node):
                if node_son not in self.blocs:
                    has_all_son = False
                    break
            if has_all_son:
                continue
            parents = self.g.reachable_parents(node)
            for parent in parents:
                irb = self.blocs[parent]
                for var_w in irb.w:
                    self.blocs[node].c_out_missing.update(var_w)

    def compute_dead(self):
        """Iterate liveness analysis until a fixed point is reached.
        PRE: gen_graph()
        """

        it = 0
        fixed_point = False
        log.debug('iteration...')
        while not fixed_point:
            log.debug(it)
            it += 1
            for n in self.g.nodes():
                if n not in self.blocs:
                    # leaf has lost her son
                    continue
                irb = self.blocs[n]
                self.compute_in_out(irb)

            fixed_point = self.test_in_out_fix()

    def dead_simp(self):
        """This function is used to analyse relation of a * complete function *
        This mean the blocs under study represent a solid full function graph.

        Ref: CS 5470 Compiler Techniques and Principles (Liveness
        analysis/Dataflow equations)

        PRE: call to gen_graph
        """

        modified = True
        while modified:
            log.debug('dead_simp step')

            # Update r/w variables for all irblocs
            self.get_rw()
            # Fill c_in for missing sons
            self.fill_missing_son_c_in()

            # Liveness step
            self.compute_dead()
            modified = self.remove_blocs_dead()

        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
예제 #31
0
class Snapshot(object):

    clobbered_regs = [
        "RCX", "RDX", "RSI", "RDI", "RBP", "R8", "R9", "R10", "R11", "RBP"
    ]

    @classmethod
    def get_byte(cls, value, byte):
        '''Return the byte @byte of the value'''
        return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte))

    @classmethod
    def unpack_ptr(cls, value):
        return struct.unpack('@P', value)[0]

    def __init__(self, segments, abicls, machine):
        self.segments = segments
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self.refs = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer() / 8
        self.sp = self._ira.sp.name

    def _get_segment_index_by_addr(self, addr):
        for i, seg in enumerate(self.segments):
            if seg[0] <= addr < seg[1]:
                return i
        raise ValueError("Segment not found for addr %x" % addr)

    def add_input_register(self, reg_name, reg_value):
        self.input_reg[reg_name] = reg_value

    def add_output_register(self, reg_name, reg_value):
        self.output_reg[reg_name] = reg_value

    def add_memory_read(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(
                1,
                Snapshot.get_byte(value, i),
                0,  # Output access never used
                self._get_segment_index_by_addr(address + i))

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(
                    1, Snapshot.get_byte(value, i), PAGE_READ,
                    self._get_segment_index_by_addr(address + i))

            else:
                self.in_memory[address + i].access |= PAGE_READ

    def add_memory_write(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(
                1,
                Snapshot.get_byte(value, i),
                0,  # Output access never used
                self._get_segment_index_by_addr(address + i))

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(
                    1,
                    "\x00",
                    # The value is
                    # not used by the
                    # test
                    PAGE_WRITE,
                    self._get_segment_index_by_addr(address + i))

            else:
                self.in_memory[address + i].access |= PAGE_WRITE

    def add_executed_instruction(self, address):
        '''
        Function called to signal that the address has been executed
        This function has to be called in the order of their executed instruction
        Else paths can not be updated correctly
        '''
        self._previous_addr = self._current_addr
        self._current_addr = address
        self.paths.add_uniq_edge(self._previous_addr, self._current_addr)

    def clean(self):
        '''Try to remove all implementation dependant elements from the trace'''

        # do not record stack frame on output because the use of this
        # memory aera is implementation dependant
        self.remove_stack_frame()

        self.agglomerate_memory(self.in_memory)
        self.agglomerate_memory(self.out_memory)

        self.remove_clobbered_registers()

        self.find_references_to_input_memory()

        self.addresses_to_segment_offset()

        self.remap_segment()

    def remove_stack_frame(self):
        '''
        Remove stack frame from the memory.
        Memory is considered fragmented ie. composed of one bit sized blocks
        '''

        SP = self.input_reg[self.sp]

        stack_seg_idx = self._get_segment_index_by_addr(SP)
        top_stack = self.segments[stack_seg_idx][0]

        for mem in (self.out_memory, self.in_memory):
            for addr in mem.keys():
                if top_stack < addr < SP + self._ptr_size:
                    # addr in stack frame
                    del mem[addr]

    def agglomerate_memory(self, mem):
        '''
        Assuming @mem is only composed of 1 byte sized bloc,
        this function agglomerate contiguous blocs that are in the same segment and have the same access right
        '''
        for addr in sorted(mem.keys()):

            # if the addr is not already deleted
            if addr in mem:

                end_addr = addr + mem[addr].size
                while end_addr in mem:
                    cur_mem = mem[addr]
                    next_mem = mem[end_addr]

                    # If access change, do not agglomerate
                    if cur_mem.access != next_mem.access:
                        break

                    # If segment change, do not agglomerate
                    if cur_mem.segment != next_mem.segment:
                        break

                    cur_mem.size += next_mem.size
                    cur_mem.data += next_mem.data
                    del mem[end_addr]
                    end_addr += next_mem.size

    def remove_clobbered_registers(self):
        '''Remove clobbered registers from the output memory'''
        self.output_reg = {
            reg: v
            for reg, v in self.output_reg.iteritems()
            if reg not in self.clobbered_regs
        }

    def find_references_to_input_memory(self):
        '''
        Populate the refs attribut
        This attribut should contain all the references (pointers) present in memory and registers
        '''

        # The heuristic used to detect is: if a memory bloc or a
        # register hold a value that is an address used in the input
        # memory, then this bloc or register is a reference

        # To determine the size of the referenced memory, the strategy
        # is a greedy one. All the memory that follow the referenced
        # address is considered as part of the reference.

        for reg, value in self.input_reg.iteritems():
            if reg != self.sp:
                self.update_references(reg, value, "in_reg")
        for reg, value in self.output_reg.iteritems():
            if reg != self.sp:
                self.update_references(reg, value, "out_reg")

        ptr_size = self._ptr_size

        for addr, mem in self.in_memory.iteritems():
            data = mem.data
            for i in xrange(len(data) - ptr_size + 1):
                self.update_references(addr + i,
                                       self.unpack_ptr(data[i:ptr_size + i]),
                                       "in_mem")

        for addr, mem in self.out_memory.iteritems():
            data = mem.data
            for i in xrange(len(data) - ptr_size + 1):
                self.update_references(addr + i,
                                       self.unpack_ptr(data[i:ptr_size + i]),
                                       "out_mem")

        self.add_rsp_ref_to_stack()

        # If two references are contiguous, then the first reference
        # size will cover the second one. The following code remove
        # these kind of overlaps
        for ref in sorted(self.refs):
            for ref2 in self.refs:
                if ref < ref2 < ref + self.refs[ref].size:
                    self.refs[ref].size -= self.refs[ref2].size

    def add_rsp_ref_to_stack(self):
        in_RSP = self.input_reg[self.sp]
        out_RSP = self.output_reg[self.sp]

        stack_seg_idx = self._get_segment_index_by_addr(in_RSP)

        if in_RSP not in self.refs:
            self.refs[in_RSP] = Reference(stack_seg_idx, self._ptr_size)
        self.refs[in_RSP].add_ref("RSP", "in_reg")
        if out_RSP not in self.refs:
            self.refs[out_RSP] = Reference(stack_seg_idx, self._ptr_size)
        self.refs[out_RSP].add_ref("RSP", "out_reg")

    def update_references(self, name, value, value_type):
        in_mem = self.in_memory
        for addr, mem in in_mem.iteritems():
            if addr <= value < addr + mem.size:

                if value not in self.refs:
                    size = mem.size - (value - addr)
                    while addr + size in in_mem:
                        size += in_mem[addr + size].size

                    self.refs[value] = Reference(mem.segment, size)

                self.refs[value].add_ref(name, value_type)

    def addresses_to_segment_offset(self):
        '''Convert the absolute addresses to segment base/offset addresses'''

        self._addresses_to_segment_offset(self.in_memory)
        self._addresses_to_segment_offset(self.out_memory)
        self._addresses_to_segment_offset(self.refs)

        for ref in self.refs.itervalues():
            ref.addresses_to_segment_offset(self.segments)

    def _addresses_to_segment_offset(self, mem):
        for addr in mem.keys():
            seg_idx = mem[addr].segment
            segment_base = self.segments[seg_idx][0]
            mem[(addr - segment_base, seg_idx)] = mem.pop(addr)

    def isRegInInputRef(self, reg):
        for addr, ref in self.refs.iteritems():
            if reg in ref.in_reg:
                return addr
        return None

    def isRegInOutputRef(self, reg):
        for addr, ref in self.refs.iteritems():
            if reg in ref.out_reg:
                return addr
        return None

    def isMemInRef(self, mem):
        for addr, ref in self.refs.iteritems():
            if mem in ref.in_mem:
                return addr
        return None

    def _updateSegmentInDict(self, dic, seg_mapping):
        '''
        Change the segment indexes used by dic according to the new segment mapping seg_mapping
        '''
        for (offset, seg) in dic.keys():
            new_seg_nb = seg_mapping[seg]
            dic[(offset, seg)].segment = new_seg_nb
            dic[(offset, new_seg_nb)] = dic.pop((offset, seg))

    def __update_mapping_struct(self, memory, seg_mapping, seg_borne):
        '''
        Add the segments used in mem to the segment mapping (seg_mapping) and update the bornes (seg_borne)
        Instance variable "__nb_seg" should be initialized to 0 before the fisrt call to this function
        '''

        for (offset, seg), mem in memory.iteritems():
            if seg in seg_mapping:
                (minAddr, maxAddr) = seg_borne[seg]
                seg_borne[seg] = (min(minAddr,
                                      offset), max(maxAddr, offset + mem.size))
            else:
                seg_borne[seg] = (offset, offset + mem.size)
                seg_mapping[seg] = self.__nb_seg
                self.__nb_seg += 1
        return self.__nb_seg

    def remap_segment(self):
        '''
        Reduce the self.segment structure to be minimalist (only segments used by in and out memory)
        '''
        seg_mapping = {}
        seg_borne = {}

        # Get the new segment mapping and corresponding sizes
        self.__nb_seg = 0
        self.__update_mapping_struct(self.in_memory, seg_mapping, seg_borne)
        self.__update_mapping_struct(self.out_memory, seg_mapping, seg_borne)
        self.__update_mapping_struct(self.refs, seg_mapping, seg_borne)

        seg_size = [0] * len(seg_mapping)
        for no_seg, borne in seg_borne.iteritems():
            seg_size[seg_mapping[no_seg]] = borne[1] - borne[0]

        # Use the new mapping in snapshot's dictionaries
        self._updateSegmentInDict(self.in_memory, seg_mapping)
        self._updateSegmentInDict(self.out_memory, seg_mapping)
        self._updateSegmentInDict(self.refs, seg_mapping)

        # Use the new mapping inside reference structure
        for ref in self.refs.itervalues():
            ref.in_mem = [(offset, seg_mapping[seg])
                          for (offset, seg) in ref.in_mem]
            ref.out_mem = [(offset, seg_mapping[seg])
                           for (offset, seg) in ref.out_mem]

        # Update self.segment according to the new mapping
        new_segments = [None] * len(seg_mapping)

        for seg, mapping in seg_mapping.iteritems():
            new_segments[mapping] = self.segments[seg]
        self.segments = new_segments

    def removeRegFromRef(self, reg):
        for ref in self.refs.itervalues():
            if reg in ref.in_reg:
                ref.in_reg.remove(reg)

    def removeMemFromRef(self, mem):
        for ref in self.refs.itervalues():
            if mem in ref.in_mem:
                ref.in_mem.remove(mem)

    def getStackSegment(self):
        for addr, ref in self.refs.iteritems():
            if self.sp in ref.in_reg:
                return addr

    # True: arg changed
    # False: arg not present
    def changeArg(self, number, newValue):

        # If argument is pass in a register
        if number < 7:
            reg_list = self.abicls.regs_mapping

            self.removeRegFromRef(reg_list[number - 1])

            try:
                self.input_reg[reg_list[number - 1]] = newValue
                del self.output_reg[reg_list[number - 1]]
            except KeyError:
                pass

            return True

        # If argument is pass on the stack
        else:
            (stackOff, stackSeg) = self.getStackSegment()

            argAddr = stackOff + 8 * (number - 7) + 8

            self.removeMemFromRef((argAddr, stackSeg))

            argFoundInSnapshot = False

            memO = self.out_memory
            memI = self.in_memory

            # Search for the arg adresse in the input memory$
            # If it is found, change its value
            for (offset, seg), mem in memI.iteritems():
                if seg == stackSeg:
                    if offset <= argAddr < offset + mem.size:
                        argFoundInSnapshot = True
                        data = mem.data
                        mem.data = data[0:argAddr - offset] + struct.pack(
                            '@P', newValue) + data[self._ptr_size + argAddr -
                                                   offset:]
                        mem.size = len(mem.data)

            # If the argument is found, we remove it from the input memory
            # Because it will be allocated as an argument by the test itself
            if argFoundInSnapshot:
                for (offset, seg) in memO.keys():
                    addr = (offset, seg)
                    if seg == stackSeg:
                        if offset <= argAddr < offset + memO[addr].size:

                            if argAddr + self._ptr_size < offset + memO[
                                    addr].size:
                                memaccess = MemoryAccess(
                                    memO[addr].size -
                                    (argAddr + self._ptr_size - offset),
                                    memO[addr].data[argAddr - offset:argAddr +
                                                    self._ptr_size - offset],
                                    memO[addr].access, stackSeg)
                                memO[(argAddr + self._ptr_size,
                                      stackSeg)] = memaccess

                            if argAddr > offset:
                                memO[addr].size = argAddr - offset

                            else:
                                del memO[addr]

            return argFoundInSnapshot
예제 #32
0
    def _build_depgraph(self, depnode):
        """Recursively build the final list of DiGraph, and clean up unmodifier
        nodes
        @depnode: starting node
        """

        if depnode not in self._cache or \
                not self._cache[depnode]:
            # There is no dependency
            graph = DiGraph()
            graph.add_node(depnode)
            return graph

        # Recursion
        dependencies = list(self._cache[depnode])

        graphs = []
        for sub_depnode in dependencies:
            graphs.append(self._build_depgraph(sub_depnode))

        # head(graphs[i]) == dependencies[i]
        graph = DiGraph()
        graph.add_node(depnode)
        for head in dependencies:
            graph.add_uniq_edge(head, depnode)

        for subgraphs in itertools.product(graphs):
            for sourcegraph in subgraphs:
                for node in sourcegraph.nodes():
                    graph.add_node(node)
                for edge in sourcegraph.edges():
                    graph.add_uniq_edge(*edge)

        # Update the running queue
        return graph
예제 #33
0
 def __init__(self, ab=[]):
     self.blocs = {}
     self.g = DiGraph()
     self.add_blocs(ab)
예제 #34
0
class ira:
    def ira_regs_ids(self):
        """Returns ids of all registers used in the IR"""
        return self.arch.regs.all_regs_ids + [self.IRDst]

    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if o not in found:
                    follow.add(o)
            todo = follow

        return done

    def gen_graph(self, link_all=True):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(self.symbol_pool.getby_offset_create(int(
                        d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        """Output the graphviz script"""
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
        """
        all_lbls = {}
        for lbl in self.g.nodes():
            if lbl not in self.blocs:
                continue
            irb = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in irb.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            # print l, v
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            # print 'edge', a, b, hash(a), hash(b)
            out += '%s -> %s;\n' % (hash(a), hash(b))
        out += '}'
        return out

    def remove_dead_instr(self, irb, useful):
        """Remove dead affectations using previous reaches analysis
        @irb: irbloc instance
        @useful: useful statements from previous reach analysis
        Return True iff the block state has changed
        PRE: compute_reach(self)
        """
        modified = False
        for k, ir in enumerate(irb.irs):
            j = 0
            while j < len(ir):
                cur_instr = ir[j]
                if (isinstance(cur_instr.dst, ExprId)
                        and (irb.label, k, cur_instr) not in useful):
                    del ir[j]
                    modified = True
                else:
                    j += 1
        return modified

    def init_useful_instr(self):
        """Computes a set of triples (block, instruction number, instruction)
        containing initially useful instructions :
          - Instructions affecting final value of return registers
          - Instructions affecting IRDst register
          - Instructions writing in memory
          - Function call instructions
        Return set of intial useful instructions
        """

        useful = set()

        for node in self.g.nodes():
            if node not in self.blocs:
                continue

            block = self.blocs[node]
            successors = self.g.successors(node)
            has_son = bool(successors)
            for p_son in successors:
                if p_son not in self.blocs:
                    # Leaf has lost its son: don't remove anything
                    # reaching this block
                    for r in self.ira_regs_ids():
                        useful.update(block.cur_reach[-1][r].union(
                            block.defout[-1][r]))

            # Function call, memory write or IRDst affectation
            for k, ir in enumerate(block.irs):
                for i_cur in ir:
                    if i_cur.is_function_call():
                        # /!\ never remove ir calls
                        useful.add((block.label, k, i_cur))
                    if isinstance(i_cur.dst, ExprMem):
                        useful.add((block.label, k, i_cur))
                    useful.update(block.defout[k][self.IRDst])

            # Affecting return registers
            if not has_son:
                for r in self.get_out_regs(block):
                    useful.update(block.defout[-1][r] if block.
                                  defout[-1][r] else block.cur_reach[-1][r])

        return useful

    def _mark_useful_code(self):
        """Mark useful statements using previous reach analysis

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division,  Algorithm MK

        Return a set of triplets (block, instruction number, instruction) of
        useful instructions
        PRE: compute_reach(self)

        """

        useful = self.init_useful_instr()
        worklist = useful.copy()
        while worklist:
            elem = worklist.pop()
            useful.add(elem)
            irb, irs_ind, ins = elem

            block = self.blocs[irb]
            instr_defout = block.defout[irs_ind]
            cur_kill = block.cur_kill[irs_ind]
            cur_reach = block.cur_reach[irs_ind]

            # Handle dependencies of used variables in ins
            for reg in ins.get_r(True).intersection(self.ira_regs_ids()):
                worklist.update(cur_reach[reg].difference(useful).difference(
                    cur_kill[reg] if not instr_defout[reg] else set()))
                for _, _, i in instr_defout[reg]:
                    # Loop case (i in defout of current block)
                    if i == ins:
                        worklist.update(cur_reach[reg].difference(useful))
        return useful

    def remove_dead_code(self):
        """Remove dead instructions in each block of the graph using the reach
        analysis .
        Returns True if a block has been modified
        PRE : compute_reach(self)
        """
        useful = self._mark_useful_code()
        modified = False
        for block in self.blocs.values():
            modified |= self.remove_dead_instr(block, useful)
        return modified

    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    @staticmethod
    def print_set(v_set):
        """Print each triplet contained in a set
        @v_set: set containing triplets elements
        """
        for p in v_set:
            print '    (%s, %s, %s)' % p

    def dump_bloc_state(self, irb):
        print '*' * 80
        for k, irs in enumerate(irb.irs):
            for i in xrange(len(irs)):
                print 5 * "-"
                print 'instr', k, irs[i]
                print 5 * "-"
                for v in self.ira_regs_ids():
                    if irb.cur_reach[k][v]:
                        print 'REACH[%d][%s]' % (k, v)
                        self.print_set(irb.cur_reach[k][v])
                    if irb.cur_kill[k][v]:
                        print 'KILL[%d][%s]' % (k, v)
                        self.print_set(irb.cur_kill[k][v])
                    if irb.defout[k][v]:
                        print 'DEFOUT[%d][%s]' % (k, v)
                        self.print_set(irb.defout[k][v])

    def compute_reach_block(self, irb):
        """Variable influence computation for a single block
        @irb: irbloc instance
        PRE: init_reach()
        """

        reach_block = {
            key: value.copy()
            for key, value in irb.cur_reach[0].iteritems()
        }

        # Compute reach from predecessors
        for n_pred in self.g.predecessors(irb.label):
            p_block = self.blocs[n_pred]

            # Handle each register definition
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = p_block.defout[-1][c_reg].union(
                    p_block.cur_reach[-1][c_reg].difference(
                        p_block.cur_kill[-1][c_reg]))
                reach_block[c_reg].update(pred_through)

        # If a predecessor has changed
        if reach_block != irb.cur_reach[0]:
            irb.cur_reach[0] = reach_block
            for c_reg in self.ira_regs_ids():
                if irb.defout[0][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[0][c_reg].update(
                        reach_block[c_reg].difference(irb.defout[0][c_reg]))

        # Compute reach and kill for block's instructions
        for i in xrange(1, len(irb.irs)):
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = irb.defout[i - 1][c_reg].union(
                    irb.cur_reach[i - 1][c_reg].difference(
                        irb.cur_kill[i - 1][c_reg]))
                irb.cur_reach[i][c_reg].update(pred_through)
                if irb.defout[i][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[i][c_reg].update(
                        irb.cur_reach[i][c_reg].difference(
                            irb.defout[i][c_reg]))

    def _test_kill_reach_fix(self):
        """Return True iff a fixed point has been reached during reach
        analysis"""

        fixed = True
        for node in self.g.nodes():
            if node in self.blocs:
                irb = self.blocs[node]
                if (irb.cur_reach != irb.prev_reach
                        or irb.cur_kill != irb.prev_kill):
                    fixed = False
                    irb.prev_reach = irb.cur_reach[:]
                    irb.prev_kill = irb.cur_kill[:]
        return fixed

    def compute_reach(self):
        """
        Compute reach, defout and kill sets until a fixed point is reached.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43

        PRE: gen_graph()
        """
        fixed_point = False
        log.debug('iteration...')
        while not fixed_point:
            for node in self.g.nodes():
                if node in self.blocs:
                    self.compute_reach_block(self.blocs[node])
            fixed_point = self._test_kill_reach_fix()

    def dead_simp(self):
        """
        This function is used to analyse relation of a * complete function *
        This means the blocks under study represent a solid full function graph.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43

        PRE: gen_graph()
        """
        # Update r/w variables for all irblocs
        self.get_rw(self.ira_regs_ids())
        # Liveness step
        self.compute_reach()
        self.remove_dead_code()
        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")