Beispiel #1
0
    def get_graph(self, addr):
        from capstone import CS_OP_IMM
        ARCH_UTILS = self.load_arch_module().utils

        curr = self.lazy_disasm(addr)
        gph = Graph(self, addr)
        rest = []

        start = time.clock()

        while 1:
            if not gph.exists(curr):
                if ARCH_UTILS.is_uncond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[0].type == CS_OP_IMM:
                        addr = curr.operands[0].value.imm
                        nxt = self.lazy_disasm(addr)
                        gph.set_next(curr, nxt)
                        rest.append(nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr)
                    gph.uncond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_cond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[0].type == CS_OP_IMM:
                        nxt_jump = self.lazy_disasm(curr.operands[0].value.imm)
                        direct_nxt = self.lazy_disasm(curr.address + curr.size)
                        gph.set_cond_next(curr, nxt_jump, direct_nxt)
                        rest.append(nxt_jump.address)
                        rest.append(direct_nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr)
                    gph.cond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_ret(curr):
                    gph.add_node(curr)

                else:
                    try:
                        nxt = self.lazy_disasm(curr.address + curr.size)
                        gph.set_next(curr, nxt)
                        rest.append(nxt.address)
                    except:
                        gph.add_node(curr)
                        pass

            try:
                curr = self.lazy_disasm(rest.pop())
            except IndexError:
                break

        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Graph built in %fs" % elapsed)

        return gph
Beispiel #2
0
def generate_ast(ctx__, paths):
    global ctx
    ctx = ctx__

    start = time.clock()

    ast = get_ast_branch(ctx, paths)

    elapsed = time.clock()
    elapsed = elapsed - start
    debug__("Ast generated in %fs" % elapsed)

    # Process ast

    start = time.clock()

    for func in ctx.libarch.registered:
        func(ctx, ast)

    elapsed = time.clock()
    elapsed = elapsed - start
    debug__("Functions for processing ast in %fs" % elapsed)

    if ctx.color:
        ctx.libarch.process_ast.assign_colors(ctx, ast)

    return ast
Beispiel #3
0
    def __init__(self, mem, filename, raw_type=None, raw_base=None, raw_big_endian=None):
        self.__binary = None
        self.reverse_symbols = {} # ad -> name
        self.symbols = {} # name -> ad
        self.section_names = {}
        self.type = None

        self._abs_sections = {} # start section -> SectionAbs
        self._sorted_sections = [] # bisect list, contains section start address

        if raw_type != None:
            import lib.fileformat.raw as LIB_RAW
            self.__binary = LIB_RAW.Raw(self, filename, raw_type,
                                        raw_base, raw_big_endian)
            self.type = T_BIN_RAW
            return

        start = time()
        self.load_magic(filename)

        if self.type == T_BIN_ELF:
            import lib.fileformat.elf as LIB_ELF
            self.__binary = LIB_ELF.ELF(mem, self, filename)
        elif self.type == T_BIN_PE:
            import lib.fileformat.pe as LIB_PE
            self.__binary = LIB_PE.PE(mem, self, filename)
        else:
            raise ExcFileFormat()

        elapsed = time()
        elapsed = elapsed - start
        debug__("Binary loaded in %fs" % elapsed)
Beispiel #4
0
    def __loop_detection(self, ctx, entry):
        start = time.clock()

        self.__explore(entry, set(), set(), {}, None)

        self.__search_equiv_loops()
        self.__search_false_loops()

        for k in self.false_loops:
            del self.loops_all[k]

        # Search inifinite loops
        self.infinite_loop = set()
        for l_curr_loop, l_set in self.loops_all.items():
            if self.__is_inf_loop(l_set):
                self.infinite_loop.add(l_curr_loop)

        # Save first address of loops
        for _, start in self.loops_all:
            self.loops_start.add(start)

        # search last node which force to looping
        for (l_prev_loop, l_start), l_set in self.loops_all.items():
            self.last_loop_node[(l_prev_loop, l_start)] = set()
            self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop in %fs" %
                (len(self.loops_all), elapsed))
Beispiel #5
0
    def __init__(self,
                 filename,
                 raw_type=None,
                 raw_base=None,
                 raw_big_endian=None):
        self.__binary = None
        self.reverse_symbols = {}
        self.symbols = {}
        self.type = None

        if raw_type != None:
            import lib.fileformat.raw as LIB_RAW
            self.__binary = LIB_RAW.Raw(filename, raw_type, raw_base,
                                        raw_big_endian)
            self.type = T_BIN_RAW
            return

        start = time.clock()
        self.load_magic(filename)

        if self.type == T_BIN_ELF:
            import lib.fileformat.elf as LIB_ELF
            self.__binary = LIB_ELF.ELF(self, filename)
        elif self.type == T_BIN_PE:
            import lib.fileformat.pe as LIB_PE
            self.__binary = LIB_PE.PE(self, filename)
        else:
            raise ExcFileFormat()

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Binary loaded in %fs" % elapsed)
Beispiel #6
0
    def loop_detection(self, entry, bypass_false_search=False):
        start = time()

        # Equivalent loops at a same deep in the loops dependencies tree
        self.deep_equiv = set()
        # For one loop : contains all address of the loop only
        self.loops_set = {}
        # For one loop : contains all address of the loop and sub-loops
        self.loops_all = {}
        # Loop dependencies
        self.deps = {}
        self.rev_deps = {}
        # Loops marked as "False"
        self.false_loops = set()

        if len(self.nodes) > MAX_NODES:
            self.skipped_loops_analysis = True
            return

        self.__explore(entry, set(), set(), {}, None, set())

        self.roots = self.loops_set.keys() - self.rev_deps.keys()

        self.__prune_loops()

        if not bypass_false_search:
            self.__search_false_loops()
            self.__search_same_deep_equiv_loops()

        self.__update_loops()

        # Compute all address which are not in a loop
        in_loop = set()
        for l in self.loops_set.items():
            in_loop.update(l[1])

        # Rest of all address which are not in a loop
        self.not_in_loop = self.nodes.keys() - in_loop

        # Search inifinite loops
        self.infinite_loop = set()
        for l_curr_loop, l_set in self.loops_all.items():
            if self.__is_inf_loop(l_set):
                self.infinite_loop.add(l_curr_loop)

        # Save first address of loops
        self.loops_start = set()
        for _, l_start in self.loops_all:
            self.loops_start.add(l_start)

        # For each loop we search the last node that if we enter in it,
        # we are sure to return to the loop.
        self.last_loop_node = {}
        for (l_prev_loop, l_start), l_set in self.loops_all.items():
            self.last_loop_node[(l_prev_loop, l_start)] = set()
            self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set)

        elapsed = time()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop(s) in %fs" % (len(self.loops_all), elapsed))
Beispiel #7
0
    def __init__(self, filename, raw_type=None, raw_base=None, raw_big_endian=None):
        self.__binary = None
        self.reverse_symbols = {}
        self.symbols = {}
        self.type = None

        if raw_type != None:
            import lib.fileformat.raw as LIB_RAW
            self.__binary = LIB_RAW.Raw(filename, raw_type, raw_base, raw_big_endian)
            self.type = T_BIN_RAW
            return

        start = time.clock()
        self.load_magic(filename)

        if self.type == T_BIN_ELF:
            import lib.fileformat.elf as LIB_ELF
            self.__binary = LIB_ELF.ELF(self, filename)
        elif self.type == T_BIN_PE:
            import lib.fileformat.pe as LIB_PE
            self.__binary = LIB_PE.PE(self, filename)
        else:
            raise ExcFileFormat()

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Binary loaded in %fs" % elapsed)
Beispiel #8
0
 def load_symbols(self):
     start = time()
     self.__binary.load_static_sym()
     self.__binary.load_dyn_sym()
     elapsed = time()
     elapsed = elapsed - start
     debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
Beispiel #9
0
    def __loop_detection(self, ctx, entry):
        start = time.clock()

        waiting = {}
        self.__explore(entry, set(), set(), waiting, None, set())

        self.__search_equiv_loops()
        self.__search_false_loops()

        for k in self.false_loops:
            del self.loops_all[k]

        # Search inifinite loops
        self.infinite_loop = set()
        for l_curr_loop, l_set in self.loops_all.items():
            if self.__is_inf_loop(l_set):
                self.infinite_loop.add(l_curr_loop)

        # Save first address of loops
        for _, start in self.loops_all:
            self.loops_start.add(start)

        # search last node which force to looping
        for (l_prev_loop, l_start), l_set in self.loops_all.items():
            self.last_loop_node[(l_prev_loop, l_start)] = set()
            self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop in %fs" %
                (len(self.loops_all), elapsed))
Beispiel #10
0
def generate_ast(ctx__, paths):
    global ctx
    ctx = ctx__

    start = time.clock()

    ast = get_ast_branch(ctx, paths)

    elapsed = time.clock()
    elapsed = elapsed - start
    debug__("Ast generated in %fs" % elapsed)

    # Process ast

    start = time.clock()

    for func in ctx.libarch.registered:
        func(ctx, ast)

    elapsed = time.clock()
    elapsed = elapsed - start
    debug__("Functions for processing ast in %fs" % elapsed)

    if ctx.color:
        ctx.libarch.process_ast.assign_colors(ctx, ast)

    return ast
Beispiel #11
0
    def decompile(self):
        self.is_dump = False
        self.gph, pe_nb_new_syms = self.gctx.dis.get_graph(self.entry)

        if self.gph is None:
            error("capstone can't disassemble here")
            return None
        self.gph.simplify()

        if self.gctx.db.loaded and pe_nb_new_syms:
            self.gctx.db.modified = True
        
        try:
            self.gph.loop_detection(self.entry)
            ast, correctly_ended = generate_ast(self)
            if not correctly_ended:
                debug__("Second try...")
                self.gph.loop_detection(self.entry, True)
                ast, _ = generate_ast(self)

            self.ast = ast
        except ExcIfelse as e:
            error("can't have a ifelse here     %x" % e.addr)
            if self.gctx.interactive_mode:
                return None
            die()

        o = self.gctx.libarch.output.Output(self)
        o._ast(self.entry, ast)
        self.output = o
        return o
Beispiel #12
0
    def pe_reverse_stripped_symbols(self, dis):
        start = time.clock()

        n = self.__binary.pe_reverse_stripped_symbols(dis)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Found %d imported symbols (PE) in %fs" % (n, elapsed))
Beispiel #13
0
    def pe_reverse_stripped_symbols(self, dis):
        start = time.clock()

        n = self.__binary.pe_reverse_stripped_symbols(dis)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Found %d imported symbols (PE) in %fs" % (n, elapsed))
Beispiel #14
0
 def __explore(self, entry):
     paths = Paths()
     start = time.clock()
     self.__rec_explore(paths, [], entry)
     elapsed = time.clock()
     elapsed = elapsed - start
     debug__("Exploration: found %d paths and %d loop-paths in %fs" %
             (len(paths.paths), len(paths.looping), elapsed))
     return paths
Beispiel #15
0
 def __explore(self, entry):
     paths = Paths()
     start = time.clock()
     self.__rec_explore(paths, [], entry)
     elapsed = time.clock()
     elapsed = elapsed - start
     debug__("Exploration: found %d paths and %d loop-paths in %fs" %
             (len(paths.paths), len(paths.looping), elapsed))
     return paths
Beispiel #16
0
    def load_extra(self):
        start = time.clock()

        self.__binary.load_static_sym()
        self.__binary.load_dyn_sym()
        self.__binary.load_data_sections()

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
Beispiel #17
0
    def load_extra(self):
        start = time.clock()

        self.__binary.load_static_sym()
        self.__binary.load_dyn_sym()
        self.__binary.load_data_sections()

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
Beispiel #18
0
    def __compute_nested(self):
        start = time.clock()

        for k in range(len(self.loops)):
            self.nested_loops_idx[k] = set()
            self.direct_nested_idx[k] = set()

        has_parent_loop_idx = set()

        for k, l in enumerate(self.loops):
            self.nested_loops_idx[k] = set()
            self.direct_nested_idx[k] = set()

        for k1, l1 in enumerate(self.loops):
            if k1 in self.marked:
                continue
            for addr in l1[1:]:
                # check if addr is a beginning of another loop
                # found = -1
                for k2, l2 in enumerate(self.loops):
                    if k2 in self.marked or \
                            self.loops_set[k1] == self.loops_set[k2]:
                        continue
                    if l2[0] == addr:
                        self.direct_nested_idx[k1].add(k2)
                        self.nested_loops_idx[k1].add(k2)
                        has_parent_loop_idx.add(k2)

        # Warning : sometimes a sub-nested-loop didn't appear in a
        # parent-parent-loop. So we search for new nested.
        # See tests/nestedloop5 :
        # the path of the third loop is not in the first one

        while 1:
            moved = False
            for parent in self.nested_loops_idx:
                l_par = self.nested_loops_idx[parent]
                for nest in list(l_par):
                    for subnest in self.nested_loops_idx[nest]:
                        if subnest not in l_par:
                            l_par.add(subnest)
                            has_parent_loop_idx.add(subnest)
                            moved = True
            if not moved:
                break

        self.direct_nested_idx[-1] = set(range(len(
            self.loops))) - has_parent_loop_idx
        self.nested_loops_idx[-1] = set(range(len(self.loops)))

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Nested loops computed in %fs" % elapsed)
Beispiel #19
0
    def __compute_nested(self):
        start = time.clock()

        for k in range(len(self.loops)):
            self.nested_loops_idx[k] = set()
            self.direct_nested_idx[k] = set()

        has_parent_loop_idx = set()

        for k, l in enumerate(self.loops):
            self.nested_loops_idx[k] = set()
            self.direct_nested_idx[k] = set()

        for k1, l1 in enumerate(self.loops):
            if k1 in self.marked:
                continue
            for addr in l1[1:]:
                # check if addr is a beginning of another loop
                # found = -1
                for k2, l2 in enumerate(self.loops):
                    if k2 in self.marked or \
                            self.loops_set[k1] == self.loops_set[k2]:
                        continue
                    if l2[0] == addr:
                        self.direct_nested_idx[k1].add(k2) 
                        self.nested_loops_idx[k1].add(k2) 
                        has_parent_loop_idx.add(k2)

        # Warning : sometimes a sub-nested-loop didn't appear in a
        # parent-parent-loop. So we search for new nested.
        # See tests/nestedloop5 :
        # the path of the third loop is not in the first one

        while 1:
            moved = False
            for parent in self.nested_loops_idx:
                l_par = self.nested_loops_idx[parent]
                for nest in list(l_par):
                    for subnest in self.nested_loops_idx[nest]:
                        if subnest not in l_par:
                            l_par.add(subnest)
                            has_parent_loop_idx.add(subnest)
                            moved = True
            if not moved:
                break

        self.direct_nested_idx[-1] = set(range(len(self.loops))) - has_parent_loop_idx
        self.nested_loops_idx[-1] = set(range(len(self.loops)))

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Nested loops computed in %fs" % elapsed)
Beispiel #20
0
    def __loop_detection(self, ctx, entry):
        start = time()

        self.__explore(entry, set(), set(), {}, None, set())

        self.__search_equiv_loops()

        elapsed = time()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop(s) in %fs" %
                (len(self.loops_all), elapsed))

        self.html_graph([])

        self.__search_false_loops()

        for k in self.false_loops:
            del self.loops_all[k]
            del self.loops_set[k]

        # Compute all address which are not in a loop
        in_loop = set()
        for l in self.loops_set.items():
            in_loop.update(l[1])

        self.not_in_loop = self.nodes.keys() - in_loop

        # Search inifinite loops
        self.infinite_loop = set()
        for l_curr_loop, l_set in self.loops_all.items():
            if self.__is_inf_loop(l_set):
                self.infinite_loop.add(l_curr_loop)

        # Save first address of loops
        for _, l_start in self.loops_all:
            self.loops_start.add(l_start)

        # search last node which force to looping
        for (l_prev_loop, l_start), l_set in self.loops_all.items():
            self.last_loop_node[(l_prev_loop, l_start)] = set()
            self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set)

        elapsed = time()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop(s) in %fs" %
                (len(self.loops_all), elapsed))
Beispiel #21
0
    def __loop_detection(self, ctx, entry):
        start = time()

        self.__explore(entry, set(), set(), {}, None, set())

        self.__search_equiv_loops()

        elapsed = time()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop(s) in %fs" %
                (len(self.loops_all), elapsed))

        self.html_graph([])

        self.__search_false_loops()

        for k in self.false_loops:
            del self.loops_all[k]
            del self.loops_set[k]

        # Compute all address which are not in a loop
        in_loop = set()
        for l in self.loops_set.items():
            in_loop.update(l[1])

        self.not_in_loop = self.nodes.keys() - in_loop

        # Search inifinite loops
        self.infinite_loop = set()
        for l_curr_loop, l_set in self.loops_all.items():
            if self.__is_inf_loop(l_set):
                self.infinite_loop.add(l_curr_loop)

        # Save first address of loops
        for _, l_start in self.loops_all:
            self.loops_start.add(l_start)

        # search last node which force to looping
        for (l_prev_loop, l_start), l_set in self.loops_all.items():
            self.last_loop_node[(l_prev_loop, l_start)] = set()
            self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set)

        elapsed = time()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop(s) in %fs" %
                (len(self.loops_all), elapsed))
Beispiel #22
0
    def __simplify(self):
        ARCH_UTILS = self.dis.load_arch_module().utils
        nodes = list(self.nodes.keys())
        start = time.clock()

        for ad in nodes:
            inst = self.nodes[ad]
            if ARCH_UTILS.is_jump(inst[0]):
                continue

            if ad not in self.link_in or len(self.link_in[ad]) != 1 or \
                    ad == self.entry_point_addr:
                continue

            pred = self.link_in[ad][0]

            # don't fuse with jumps
            if ARCH_UTILS.is_jump(self.nodes[pred][0]):
                continue

            if pred not in self.link_out or len(self.link_out[pred]) != 1:
                continue

            if ad in self.link_out:
                self.link_out[pred] = self.link_out[ad]
            else:
                del self.link_out[pred]

            self.nodes[pred] += self.nodes[ad]

            if ad in self.link_out:
                del self.link_out[ad]

            del self.link_in[ad]
            del self.nodes[ad]

            # replace all addr wich refers to ad
            for k, lst_i in self.link_in.items():
                try:
                    lst_i[lst_i.index(ad)] = pred
                except ValueError:
                    pass

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Graph simplified in %fs" % elapsed)
Beispiel #23
0
    def __simplify(self):
        ARCH_UTILS = self.dis.load_arch_module().utils
        nodes = list(self.nodes.keys())
        start = time.clock()

        for ad in nodes:
            inst = self.nodes[ad]
            if ARCH_UTILS.is_jump(inst[0]):
                continue

            if ad not in self.link_in or len(self.link_in[ad]) != 1 or \
                    ad == self.entry_point_addr:
                continue

            pred = self.link_in[ad][0]

            # don't fuse with jumps
            if ARCH_UTILS.is_jump(self.nodes[pred][0]):
                continue

            if pred not in self.link_out or len(self.link_out[pred]) != 1:
                continue

            if ad in self.link_out:
                self.link_out[pred] = self.link_out[ad]
            else:
                del self.link_out[pred]

            self.nodes[pred] += self.nodes[ad]

            if ad in self.link_out:
                del self.link_out[ad]

            del self.link_in[ad]
            del self.nodes[ad]

            # replace all addr wich refers to ad
            for k, lst_i in self.link_in.items():
                try:
                    lst_i[lst_i.index(ad)] = pred
                except ValueError:
                    pass

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Graph simplified in %fs" % elapsed)
Beispiel #24
0
    def simplify(self):
        nodes = list(self.nodes.keys())
        start = time()

        for ad in nodes:
            if ad in self.uncond_jumps_set or ad in self.cond_jumps_set:
                continue

            if ad not in self.link_in or len(self.link_in[ad]) != 1 or \
                    ad == self.entry_point_addr:
                continue

            pred = self.link_in[ad][0]

            # don't fuse with jumps
            if pred in self.uncond_jumps_set or pred in self.cond_jumps_set:
                continue

            if pred not in self.link_out or len(self.link_out[pred]) != 1:
                continue

            if ad in self.link_out:
                self.link_out[pred] = self.link_out[ad]
            else:
                del self.link_out[pred]

            self.nodes[pred] += self.nodes[ad]

            if ad in self.link_out:
                del self.link_out[ad]

            del self.link_in[ad]
            del self.nodes[ad]

            # replace all addr wich refers to ad
            for k, lst_i in self.link_in.items():
                if ad in lst_i:
                    lst_i[lst_i.index(ad)] = pred

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph simplified in %fs (%d nodes)" %
                (elapsed, len(self.nodes)))
Beispiel #25
0
def disasm(ctx):
    ctx.gph, pe_nb_new_syms = ctx.dis.get_graph(ctx.entry_addr)

    if ctx.gph == None:
        error("capstone can't disassemble here")
        return None
    ctx.gph.simplify()

    if ctx.db.loaded and pe_nb_new_syms:
        ctx.db.modified = True
    
    try:
        ctx.gph.loop_detection(ctx, ctx.entry_addr)
        ast, correctly_ended = generate_ast(ctx)
        if not correctly_ended:
            debug__("Second try...")
            ctx.gph.loop_detection(ctx, ctx.entry_addr, True)
            ast, _ = generate_ast(ctx)
    except ExcIfelse as e:
        error("can't have a ifelse here     %x" % e.addr)
        if ctx.interactive_mode:
            return None
        die()

    if ctx.graph:
        ctx.gph.dot_graph(ctx.dis.jmptables)

    if ctx.vim:
        base = os.path.basename(ctx.filename) + "_" + ctx.entry
        # re-assign if no colors
        ctx.libarch.process_ast.assign_colors(ctx, ast)
        ctx.color = False
        generate_vim_syntax(ctx, base + ".vim")
        sys.stdout = open(base + ".rev", "w+")

    o = ctx.libarch.output.Output(ctx)
    o._ast(ctx.entry_addr, ast)

    if ctx.vim:
        print("Run :  vim {0}.rev -S {0}.vim".format(base), file=sys.stderr)

    return o
Beispiel #26
0
def disasm(ctx):
    ctx.gph, pe_nb_new_syms = ctx.dis.get_graph(ctx.entry_addr)

    if ctx.gph == None:
        error("capstone can't disassemble here")
        return None
    ctx.gph.simplify()

    if ctx.db.loaded and pe_nb_new_syms:
        ctx.db.modified = True

    try:
        ctx.gph.loop_detection(ctx, ctx.entry_addr)
        ast, correctly_ended = generate_ast(ctx)
        if not correctly_ended:
            debug__("Second try...")
            ctx.gph.loop_detection(ctx, ctx.entry_addr, True)
            ast, _ = generate_ast(ctx)
    except ExcIfelse as e:
        error("can't have a ifelse here     %x" % e.addr)
        if ctx.interactive_mode:
            return None
        die()

    if ctx.graph:
        ctx.gph.dot_graph(ctx.dis.jmptables)

    if ctx.vim:
        base = os.path.basename(ctx.filename) + "_" + ctx.entry
        # re-assign if no colors
        ctx.libarch.process_ast.assign_colors(ctx, ast)
        ctx.color = False
        generate_vim_syntax(ctx, base + ".vim")
        sys.stdout = open(base + ".rev", "w+")

    o = ctx.libarch.output.Output(ctx)
    o._ast(ctx.entry_addr, ast)

    if ctx.vim:
        print("Run :  vim {0}.rev -S {0}.vim".format(base), file=sys.stderr)

    return o
Beispiel #27
0
    def simplify(self):
        nodes = list(self.nodes.keys())
        start = time()

        for ad in nodes:
            if ad in self.uncond_jumps_set or ad in self.cond_jumps_set:
                continue

            if ad not in self.link_in or len(self.link_in[ad]) != 1 or \
                    ad == self.entry_point_addr:
                continue

            pred = self.link_in[ad][0]

            # don't fuse with jumps
            if pred in self.uncond_jumps_set or pred in self.cond_jumps_set:
                continue

            if pred not in self.link_out or len(self.link_out[pred]) != 1:
                continue

            if ad in self.link_out:
                self.link_out[pred] = self.link_out[ad]
            else:
                del self.link_out[pred]

            self.nodes[pred] += self.nodes[ad]

            if ad in self.link_out:
                del self.link_out[ad]

            del self.link_in[ad]
            del self.nodes[ad]

            # replace all addr wich refers to ad
            for k, lst_i in self.link_in.items():
                if ad in lst_i:
                    lst_i[lst_i.index(ad)] = pred

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph simplified in %fs (%d nodes)" % (elapsed, len(self.nodes)))
Beispiel #28
0
    def __init__(self, filename, raw_type=None):
        self.__binary = None
        self.reverse_symbols = {}
        self.symbols = {}
        self.type = None

        if raw_type != None:
            import lib.fileformat.raw as LIB_RAW
            self.__binary = LIB_RAW.Raw(filename, raw_type)
            self.type = T_BIN_RAW
            return

        start = time.clock()
        self.load_magic(filename)

        if self.type == T_BIN_ELF:
            import lib.fileformat.elf as LIB_ELF
            self.__binary = LIB_ELF.ELF(self, filename)
        elif self.type == T_BIN_PE:
            import lib.fileformat.pe as LIB_PE
            self.__binary = LIB_PE.PE(self, filename)
        else:
            raise ExcFileFormat()

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Binary loaded in %fs" % elapsed)

        start = time.clock()

        self.__binary.load_static_sym()
        self.__binary.load_dyn_sym()
        self.__binary.load_data_sections()

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
Beispiel #29
0
    def __init__(self, filename, raw_type=None):
        self.__binary = None
        self.reverse_symbols = {}
        self.symbols = {}
        self.type = None

        if raw_type != None:
            import lib.fileformat.raw as LIB_RAW
            self.__binary = LIB_RAW.Raw(filename, raw_type)
            self.type = T_BIN_RAW
            return

        start = time.clock()
        self.load_magic(filename)

        if self.type == T_BIN_ELF:
            import lib.fileformat.elf as LIB_ELF
            self.__binary = LIB_ELF.ELF(self, filename)
        elif self.type == T_BIN_PE:
            import lib.fileformat.pe as LIB_PE
            self.__binary = LIB_PE.PE(self, filename)
        else:
            raise ExcFileFormat()

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Binary loaded in %fs" % elapsed)

        start = time.clock()

        self.__binary.load_static_sym()
        self.__binary.load_dyn_sym()
        self.__binary.load_data_sections()

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
Beispiel #30
0
def generate_ast(ctx__):
    global ctx
    ctx = ctx__

    start = time()

    ast = Ast_Branch()
    ast.parent = None
    stack = [(ast, [], -1, ctx.entry, -1)]
    visited = set()
    waiting = {}

    ast_head = ast

    fake_br = Ast_Branch()
    fake_br.level = sys.maxsize

    while stack or waiting:

        if not stack and waiting:
            if not ctx.gph.skipped_loops_analysis:
                break
            for ad in set(waiting):
                waiting[ad].unseen.clear()
                stack.append((fake_br, [], -1, ad, -1))

        ast, loops_stack, prev, curr, else_addr = stack.pop(-1)

        # Check if we enter in a false loop (see gotoinloop*)
        if loops_stack:
            _, _, l_start = loops_stack[-1]
        else:
            l_start = ctx.entry

        if (l_start, curr) in ctx.gph.false_loops:
            continue

        blk = ctx.gph.nodes[curr]

        # Exit the current loop
        while loops_stack:
            l_ast, l_prev_loop, l_start = loops_stack[-1]
            l_set = ctx.gph.loops_all[(l_prev_loop, l_start)]
            if curr not in l_set:
                loops_stack.pop(-1)
                ast = l_ast.parent
            else:
                break

        if not loops_stack:
            l_prev_loop = -1
            l_start = ctx.entry
            l_set = None

        level = ast.level

        if curr not in visited:
            # Check if we need to stop and wait on a node
            a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set,
                                l_prev_loop, l_start, True)
            if a is None:
                continue

            ast = a
            remove_unnecessary_goto(ast, curr)

            # Check if we enter in a new loop
            if (l_start, curr) in ctx.gph.loops_all:
                if curr not in ctx.gctx.db.reverse_symbols:
                    name = "loop_0x%x" % curr
                    ctx.gctx.db.symbols[name] = curr
                    ctx.gctx.db.reverse_symbols[curr] = name
                    ctx.gctx.db.modified = True

                level += 1
                a = Ast_Loop()
                a.level = level
                a.parent = ast
                a.idx_in_parent = len(ast.nodes)
                a.branch.parent = ast
                a.branch.level = level
                a.branch.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast = a.branch
                loops_stack.append((a, l_start, curr))
                else_addr = -1
                l_ast = a
                l_set = ctx.gph.loops_all[(l_start, curr)]
                l_prev_loop = l_start
                l_start = curr
                if (l_prev_loop, l_start) in ctx.gph.infinite_loop:
                    a.is_infinite = True
            # Here curr may has changed

        if curr in visited:
            if curr == l_start:
                continue
            if len(ast.nodes) > 0:
                if isinstance(ast.nodes[-1], list):
                    prev = ast.nodes[-1][0].address
                    if prev not in ctx.gph.uncond_jumps_set:
                        ast.add(Ast_Goto(curr))
            else:
                ast.add(Ast_Goto(curr))
            continue

        visited.add(curr)

        # Return instruction
        if curr not in ctx.gph.link_out:
            if curr != ctx.entry and curr not in ctx.gctx.db.reverse_symbols:
                name = "ret_0x%x" % curr
                ctx.gctx.db.symbols[name] = curr
                ctx.gctx.db.reverse_symbols[curr] = name
                ctx.gctx.db.modified = True
            ast.add(blk)
            continue

        nxt = ctx.gph.link_out[curr]

        if curr in ctx.gctx.dis.jmptables:
            ast.add(blk)
            for n in nxt:
                stack.append((ast, loops_stack, curr, n, else_addr))

        elif len(nxt) == 2:
            # We are on a conditional jump

            prefetch = blk[1] if len(blk) == 2 else None

            if loops_stack:
                goto_set = False

                c1 = nxt[BRANCH_NEXT] not in l_set
                c2 = nxt[BRANCH_NEXT_JUMP] not in l_set

                if c1 and c2:
                    raise ExcIfelse(curr)

                if c1:
                    exit_loop = nxt[BRANCH_NEXT]
                    nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP]
                    cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0])
                    goto_set = True

                if c2:
                    exit_loop = nxt[BRANCH_NEXT_JUMP]
                    nxt_node_in_loop = nxt[BRANCH_NEXT]
                    cond_id = ctx.gctx.libarch.utils.get_cond(blk[0])
                    goto_set = True

                # goto to exit a loop
                if goto_set:
                    stack.append((ast.parent, list(loops_stack), curr,
                                  exit_loop, else_addr))
                    stack.append((ast, list(loops_stack), curr,
                                  nxt_node_in_loop, else_addr))
                    a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch)
                    a.parent = ast
                    a.level = level
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    continue

            # and-if
            if ctx.gctx.print_andif:
                if else_addr == nxt[BRANCH_NEXT_JUMP]:
                    cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch)
                    a.parent = ast
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    ast.add(Ast_Goto(nxt[BRANCH_NEXT]))

                    # Add a fake branch, with this in the manage function
                    # all gotos to the else_addr will be invisible.
                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))
                    continue

                # and-if
                if else_addr == nxt[BRANCH_NEXT]:
                    cond_id = ctx.gctx.libarch.utils.get_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP],
                                  prefetch)
                    a.parent = ast
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP]))

                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))
                    continue

            # if-else

            endpoint = search_endpoint(ctx, stack, ast, curr, l_set,
                                       l_prev_loop, l_start)

            ast_if = Ast_Branch()
            ast_if.parent = ast
            ast_if.level = level + 1
            ast_if.idx_in_parent = len(ast.nodes)

            ast_else = Ast_Branch()
            ast_else.parent = ast
            ast_else.level = level + 1
            ast_else.idx_in_parent = len(ast.nodes)

            else_addr = nxt[BRANCH_NEXT_JUMP]

            if endpoint != -1:
                if (l_start, endpoint) not in ctx.gph.false_loops:
                    # If we have already seen this address (for example the
                    # endpoint is the beginning of the current loop) we don't
                    # re-add in the waiting list.
                    if endpoint not in visited:
                        manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set,
                                        l_prev_loop, l_start, False)
                else:
                    endpoint = -1

            stack.append(
                (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr))

            if endpoint == -1:
                # No endpoint, so it's not useful to have an else-branch
                # -> the stack will continue on `ast`
                a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(else_addr))

            elif endpoint == else_addr:
                # Branch ast_else will be empty
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(else_addr))

            else:
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast_else, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(endpoint))

        else:
            ast.add(blk)
            stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr))

    ast = ast_head

    remove_all_unnecessary_goto(ast)
    fix_non_consecutives(ctx, ast)

    elapsed = time()
    elapsed = elapsed - start
    debug__("Ast generated in %fs" % elapsed)

    # Process ast

    start = time()

    for func in ctx.gctx.libarch.registered:
        func(ctx, ast)

    elapsed = time()
    elapsed = elapsed - start
    debug__("Functions for processing ast in %fs" % elapsed)

    if ctx.gctx.color:
        ctx.gctx.libarch.process_ast.assign_colors(ctx, ast)

    if waiting:
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(
            0,
            Ast_Comment("WARNING: there is a bug, the output is incomplete !"))
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(0, Ast_Comment(""))
        return ast, False

    return ast, True
Beispiel #31
0
    def loop_detection(self, entry, bypass_false_search=False):
        start = time()

        # Equivalent loops at a same deep in the loops dependencies tree
        self.deep_equiv = set()
        # For one loop : contains all address of the loop only
        self.loops_set = {}
        # For one loop : contains all address of the loop and sub-loops
        self.loops_all = {}
        # Loop dependencies
        self.deps = {}
        self.rev_deps = {}
        # Loops marked as "False"
        self.false_loops = set()

        if len(self.nodes) > MAX_NODES:
            self.skipped_loops_analysis = True
            return

        self.__explore(entry, set(), set(), {}, None, set())

        self.roots = self.loops_set.keys() - self.rev_deps.keys()

        self.__prune_loops()

        if not bypass_false_search:
            self.__search_false_loops()
            self.__search_same_deep_equiv_loops()

        self.__update_loops()

        # Compute all address which are not in a loop
        in_loop = set()
        for l in self.loops_set.items():
            in_loop.update(l[1])

        # Rest of all address which are not in a loop
        self.not_in_loop = self.nodes.keys() - in_loop

        # Search inifinite loops
        self.infinite_loop = set()
        for l_curr_loop, l_set in self.loops_all.items():
            if self.__is_inf_loop(l_set):
                self.infinite_loop.add(l_curr_loop)

        # Save first address of loops
        self.loops_start = set()
        for _, l_start in self.loops_all:
            self.loops_start.add(l_start)

        # For each loop we search the last node that if we enter in it,
        # we are sure to return to the loop.
        self.last_loop_node = {}
        for (l_prev_loop, l_start), l_set in self.loops_all.items():
            self.last_loop_node[(l_prev_loop, l_start)] = set()
            self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set)

        elapsed = time()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop(s) in %fs" %
                (len(self.loops_all), elapsed))
Beispiel #32
0
def generate_ast(ctx__):
    global ctx
    ctx = ctx__

    start = time()

    ast = Ast_Branch()
    ast.parent = None
    stack = [(ast, [], -1, ctx.entry_addr, -1)]
    visited = set()
    waiting = {}

    ast_head = ast

    while stack:
        ast, loops_stack, prev, curr, else_addr = stack.pop(-1)

        # Check if we enter in a false loop (see gotoinloop*)
        if loops_stack:
            _, _, l_start = loops_stack[-1]
        else:
            l_start = ctx.entry_addr

        if (l_start, curr) in ctx.gph.false_loops:
            continue

        # Check if we have already an other equivalent loop in waiting.
        if (l_start, curr) in ctx.gph.equiv:
            eq = ctx.gph.equiv[(l_start, curr)]
            dont_enter = False
            for ad in waiting:
                for i in waiting[ad].loop_start:
                    if (i, ad) in eq:
                        dont_enter = True
                        break
                if dont_enter:
                    break
            if dont_enter:
                # Restart main loop
                continue

        blk = ctx.gph.nodes[curr]

        # Exit the current loop
        while loops_stack:
            l_ast, l_prev_loop, l_start = loops_stack[-1]
            l_set = ctx.gph.loops_all[(l_prev_loop, l_start)]
            if curr not in l_set:
                loops_stack.pop(-1)
                ast = l_ast.parent
            else:
                break

        if not loops_stack:
            l_prev_loop = -1
            l_start = ctx.entry_addr
            l_set = None

        level = ast.level

        if curr not in visited:
            # Check if we need to stop and wait on a node
            a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set,
                                l_prev_loop, l_start, True)
            if a is None:
                continue
            ast = a
            remove_unnecessary_goto(ast, curr)

            # Check if we enter in a new loop
            is_new_loop = True
            if (l_start, curr) not in ctx.gph.loops_all:
                is_new_loop = False
            else:
                # Check if if it's not equivalent as the current loop
                if loops_stack:
                    l_ast, l_prev_loop, l_start = loops_stack[-1]
                    if (l_prev_loop, curr) in ctx.gph.equiv and \
                        (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]:
                        is_new_loop = False

            if is_new_loop:
                ctx.labels[curr] = "loop_0x%x" % curr
                level += 1
                a = Ast_Loop()
                a.level = level
                a.parent = ast
                a.idx_in_parent = len(ast.nodes)
                a.branch.parent = ast
                a.branch.level = level
                a.branch.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast = a.branch
                loops_stack.append((a, l_start, curr))
                else_addr = -1
                l_ast = a
                l_set = ctx.gph.loops_all[(l_start, curr)]
                l_prev_loop = l_start
                l_start = curr
                if (l_prev_loop, l_start) in ctx.gph.infinite_loop:
                    a.is_infinite = True
            # Here curr may has changed

        if curr in visited:
            if curr == l_start:
                continue
            if len(ast.nodes) > 0:
                if isinstance(ast.nodes[-1], list):
                    prev = ast.nodes[-1][0].address
                    if prev not in ctx.gph.uncond_jumps_set:
                        ast.add(Ast_Goto(curr))
            else:
                ast.add(Ast_Goto(curr))
            continue

        visited.add(curr)

        # Return instruction
        if curr not in ctx.gph.link_out:
            ctx.labels[curr] = "ret_0x%x" % curr
            ast.add(blk)
            continue

        nxt = ctx.gph.link_out[curr]

        if curr in ctx.dis.jmptables:
            ast.add(blk)
            for n in nxt:
                stack.append((ast, loops_stack, curr, n, else_addr))

        elif len(nxt) == 2:
            # We are on a conditional jump

            prefetch = blk[1] if len(blk) == 2 else None

            if loops_stack:
                goto_set = False

                c1 = nxt[BRANCH_NEXT] not in l_set
                c2 = nxt[BRANCH_NEXT_JUMP] not in l_set

                if c1 and c2:
                    raise ExcIfelse(curr)

                if c1:
                    exit_loop = nxt[BRANCH_NEXT]
                    nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP]
                    cond_id = ctx.libarch.utils.invert_cond(blk[0])
                    goto_set = True

                if c2:
                    exit_loop = nxt[BRANCH_NEXT_JUMP]
                    nxt_node_in_loop = nxt[BRANCH_NEXT]
                    cond_id = ctx.libarch.utils.get_cond(blk[0])
                    goto_set = True

                # goto to exit a loop
                if goto_set:
                    stack.append((ast.parent, list(loops_stack), curr,
                                  exit_loop, else_addr))
                    stack.append((ast, list(loops_stack), curr,
                                  nxt_node_in_loop, else_addr))
                    a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch)
                    a.parent = ast
                    a.level = level
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    continue

            # and-if
            if ctx.print_andif:
                if else_addr == nxt[BRANCH_NEXT_JUMP]:
                    cond_id = ctx.libarch.utils.invert_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch)
                    a.parent = ast
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    ast.add(Ast_Goto(nxt[BRANCH_NEXT]))

                    # Add a fake branch, with this in the manage function
                    # all gotos to the else_addr will be invisible.
                    fake_br = Ast_Branch()
                    fake_br.level = sys.maxsize

                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))
                    continue

                # and-if
                if else_addr == nxt[BRANCH_NEXT]:
                    cond_id = ctx.libarch.utils.get_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch)
                    a.parent = ast
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP]))

                    fake_br = Ast_Branch()
                    fake_br.level = sys.maxsize

                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))
                    continue

            # if-else

            endpoint = search_endpoint(ctx, stack, ast, curr,
                                       l_set, l_prev_loop, l_start)

            ast_if = Ast_Branch()
            ast_if.parent = ast
            ast_if.level = level + 1
            ast_if.idx_in_parent = len(ast.nodes)

            ast_else = Ast_Branch()
            ast_else.parent = ast
            ast_else.level = level + 1
            ast_else.idx_in_parent = len(ast.nodes)

            else_addr = nxt[BRANCH_NEXT_JUMP]

            if endpoint != -1:
                if (l_start, endpoint) not in ctx.gph.false_loops:
                    manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set,
                                    l_prev_loop, l_start, False)
                else:
                    endpoint = -1

            stack.append((ast_if, list(loops_stack), curr,
                          nxt[BRANCH_NEXT], else_addr))

            if endpoint == -1:
                # No endpoint, so it's not useful to have an else-branch
                # -> the stack will continue on `ast`
                a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(else_addr))

            elif endpoint == else_addr:
                # Branch ast_else will be empty
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(else_addr))

            else:
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast_else, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(endpoint))

        else:
            ast.add(blk)
            stack.append((ast, loops_stack, curr,
                          nxt[BRANCH_NEXT], else_addr))


    ast = ast_head

    remove_all_unnecessary_goto(ast)
    fix_non_consecutives(ctx, ast)

    elapsed = time()
    elapsed = elapsed - start
    debug__("Ast generated in %fs" % elapsed)

    # Process ast

    start = time()

    for func in ctx.libarch.registered:
        func(ctx, ast)

    elapsed = time()
    elapsed = elapsed - start
    debug__("Functions for processing ast in %fs" % elapsed)

    if ctx.color:
        ctx.libarch.process_ast.assign_colors(ctx, ast)

    return ast
Beispiel #33
0
    def get_graph(self, addr):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        ARCH_UTILS = self.load_arch_module().utils

        curr = self.lazy_disasm(addr)
        if curr == None:
            return None

        gph = Graph(self, addr)
        rest = []
        start = time.clock()
        prefetch = None

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        while 1:
            if not gph.exists(curr):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(curr)

                if ARCH_UTILS.is_uncond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[-1].type == CS_OP_IMM:
                        addr = curr.operands[-1].value.imm
                        nxt = self.lazy_disasm(addr)
                        gph.set_next(curr, nxt, prefetch)
                        rest.append(nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr, prefetch)
                    gph.uncond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_cond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[-1].type == CS_OP_IMM:
                        nxt_jump = self.lazy_disasm(curr.operands[-1].value.imm)

                        if self.arch == CS_ARCH_MIPS:
                            direct_nxt = \
                                self.lazy_disasm(prefetch.address + prefetch.size)
                        else:
                            direct_nxt = \
                                self.lazy_disasm(curr.address + curr.size)

                        gph.set_cond_next(curr, nxt_jump, direct_nxt, prefetch)
                        rest.append(nxt_jump.address)
                        rest.append(direct_nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr, prefetch)
                    gph.cond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_ret(curr):
                    gph.add_node(curr, prefetch)

                else:
                    try:
                        nxt = self.lazy_disasm(curr.address + curr.size)
                        gph.set_next(curr, nxt)
                        rest.append(nxt.address)
                    except:
                        gph.add_node(curr)
                        pass

            try:
                curr = self.lazy_disasm(rest.pop())
            except IndexError:
                break

        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Graph built in %fs" % elapsed)

        return gph
Beispiel #34
0
    def get_graph(self, entry_addr):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry_addr)
        stack = [entry_addr]
        start = time()
        prefetch = None

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            if ARCH_UTILS.is_ret(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]
                if op.type == CS_OP_IMM:
                    nxt = op.value.imm
                    stack.append(nxt)
                    gph.new_node(inst, prefetch, [nxt])
                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]
                if op.type == CS_OP_IMM:
                    if self.arch == CS_ARCH_MIPS:
                        direct_nxt = prefetch.address + prefetch.size
                    else:
                        direct_nxt = inst.address + inst.size

                    nxt_jmp = op.value.imm

                    stack.append(direct_nxt)
                    stack.append(nxt_jmp)
                    gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.pe_reverse_stripped_symbols(self)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" %
                (elapsed, len(gph.nodes)))

        return gph, nb_new_syms
Beispiel #35
0
    def get_graph(self, entry_addr):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry_addr)
        stack = [entry_addr]
        start = time()
        prefetch = None
        addresses = set()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            addresses.add(ad)

            if ARCH_UTILS.is_ret(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                    addresses.add(prefetch.address)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                    addresses.add(prefetch.address)
                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]
                if op.type == CS_OP_IMM:
                    nxt = op.value.imm
                    stack.append(nxt)
                    gph.new_node(inst, prefetch, [nxt])
                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                    addresses.add(prefetch.address)
                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]
                if op.type == CS_OP_IMM:
                    if self.arch == CS_ARCH_MIPS:
                        direct_nxt = prefetch.address + prefetch.size
                    else:
                        direct_nxt = inst.address + inst.size

                    nxt_jmp = op.value.imm

                    stack.append(direct_nxt)
                    stack.append(nxt_jmp)
                    gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.pe_reverse_stripped_symbols(self, addresses)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes)))

        return gph, nb_new_syms
Beispiel #36
0
def generate_ast(ctx__):
    global ctx
    ctx = ctx__

    start = time.clock()

    ast = Ast_Branch()
    ast.parent = None
    stack = [(ast, [], -1, ctx.entry_addr, -1)]
    visited = set()
    waiting = {}

    ast_head = ast

    while stack:
        ast, loops_stack, prev, curr, else_addr = stack.pop(-1)

        # Check if we enter in a false loop (see gotoinloop*)
        if loops_stack:
            _, _, l_start = loops_stack[-1]
        else:
            l_start = ctx.entry_addr

        if (l_start, curr) in ctx.gph.false_loops:
            continue

        # Check if we have already an other equivalent loop in waiting.
        if (l_start, curr) in ctx.gph.equiv:
            eq = ctx.gph.equiv[(l_start, curr)]
            dont_enter = False
            for ad in waiting:
                for i in waiting[ad].loop_start:
                    if (i, ad) in eq:
                        dont_enter = True
                        break
                if dont_enter:
                    break
            if dont_enter:
                # Restart main loop
                continue

        blk = ctx.gph.nodes[curr]

        # Exit the current loop
        while loops_stack:
            l_ast, l_prev_loop, l_start = loops_stack[-1]
            l_set = ctx.gph.loops_all[(l_prev_loop, l_start)]
            if curr not in l_set:
                loops_stack.pop(-1)
                ast = l_ast.parent
            else:
                break

        if not loops_stack:
            l_prev_loop = -1
            l_start = ctx.entry_addr
            l_set = None

        level = ast.level

        if curr not in visited:
            # Check if we need to stop and wait on a node
            a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set,
                                l_prev_loop, l_start, True)
            if a is None:
                continue
            ast = a
            remove_unnecessary_goto(ast, curr)

            # Check if we enter in a new loop
            is_new_loop = True
            if (l_start, curr) not in ctx.gph.loops_all:
                is_new_loop = False
            else:
                # Check if if it's not equivalent as the current loop
                if loops_stack:
                    l_ast, l_prev_loop, l_start = loops_stack[-1]
                    if (l_prev_loop, curr) in ctx.gph.equiv and \
                        (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]:
                        is_new_loop = False

            if is_new_loop:
                ctx.labels[curr] = "loop_0x%x" % curr
                level += 1
                a = Ast_Loop()
                a.level = level
                a.parent = ast
                a.branch.parent = ast
                a.branch.level = level
                a.branch.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast = a.branch
                loops_stack.append((a, l_start, curr))
                else_addr = -1
                l_ast = a
                l_set = ctx.gph.loops_all[(l_start, curr)]
                l_prev_loop = l_start
                l_start = curr
                if (l_prev_loop, l_start) in ctx.gph.infinite_loop:
                    a.is_infinite = True
            # Here curr may has changed

        if curr in visited:
            if curr == l_start:
                continue
            if len(ast.nodes) > 0:
                if not isinstance(ast.nodes[-1], list):
                    ast.add(Ast_Goto(curr))
                else:
                    prev_inst = ast.nodes[-1][0]
                    if not ctx.libarch.utils.is_uncond_jump(prev_inst):
                        ast.add(Ast_Goto(curr))
            else:
                ast.add(Ast_Goto(curr))
            continue

        visited.add(curr)

        # Return instruction
        if curr not in ctx.gph.link_out:
            ctx.labels[curr] = "ret_0x%x" % curr
            ast.add(blk)
            continue

        nxt = ctx.gph.link_out[curr]

        if curr in ctx.dis.jmptables:
            ast.add(blk)
            for n in nxt:
                stack.append((ast, loops_stack, curr, n, else_addr))

        elif len(nxt) == 2:
            # We are on a conditional jump

            prefetch = blk[1] if len(blk) == 2 else None

            if loops_stack:
                goto_set = False

                c1 = nxt[BRANCH_NEXT] not in l_set
                c2 = nxt[BRANCH_NEXT_JUMP] not in l_set

                if c1 and c2:
                    raise ExcIfelse(curr)

                if c1:
                    exit_loop = nxt[BRANCH_NEXT]
                    nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP]
                    cond_id = ctx.libarch.utils.invert_cond(blk[0])
                    goto_set = True

                if c2:
                    exit_loop = nxt[BRANCH_NEXT_JUMP]
                    nxt_node_in_loop = nxt[BRANCH_NEXT]
                    cond_id = ctx.libarch.utils.get_cond(blk[0])
                    goto_set = True

                # goto to exit a loop
                if goto_set:
                    stack.append((ast.parent, list(loops_stack), curr,
                                  exit_loop, else_addr))
                    stack.append((ast, list(loops_stack), curr,
                                  nxt_node_in_loop, else_addr))
                    a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch)
                    a.parent = ast
                    a.level = level
                    ast.add(a)
                    continue

            # and-if
            if ctx.print_andif:
                if else_addr == nxt[BRANCH_NEXT_JUMP]:
                    cond_id = ctx.libarch.utils.invert_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch)
                    a.parent = ast
                    ast.add(a)

                    # Add a fake branch, with this in the manage function
                    # all gotos to the else_addr will be invisible.
                    fake_br = Ast_Branch()
                    fake_br.level = sys.maxsize

                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))
                    continue

                # and-if
                if else_addr == nxt[BRANCH_NEXT]:
                    cond_id = ctx.libarch.utils.get_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP],
                                  prefetch)
                    a.parent = ast
                    ast.add(a)

                    fake_br = Ast_Branch()
                    fake_br.level = sys.maxsize

                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))
                    continue

            # if-else

            endpoint = search_endpoint(ctx, stack, ast, curr, l_set,
                                       l_prev_loop, l_start)

            ast_if = Ast_Branch()
            ast_if.parent = ast
            ast_if.level = level + 1
            ast_if.idx_in_parent = len(ast.nodes)

            ast_else = Ast_Branch()
            ast_else.parent = ast
            ast_else.level = level + 1
            ast_else.idx_in_parent = len(ast.nodes)

            else_addr = nxt[BRANCH_NEXT_JUMP]

            if endpoint != -1:
                if (l_start, endpoint) not in ctx.gph.false_loops:
                    manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set,
                                    l_prev_loop, l_start, False)
                else:
                    endpoint = -1

            stack.append(
                (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr))

            if endpoint == -1:
                a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))
            elif endpoint == else_addr:
                # Branch ast_else will be empty
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))
            else:
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast_else, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

            a.parent = ast
            a.level = level + 1
            ast.add(a)

        else:
            ast.add(blk)
            stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr))

    ast = ast_head

    remove_all_unnecessary_goto(ast)
    add_goto_after_alone_andif(ast)
    add_goto_if_inst_not_consecutives(ctx, ast)

    elapsed = time.clock()
    elapsed = elapsed - start
    debug__("Ast generated in %fs" % elapsed)

    # Process ast

    start = time.clock()

    for func in ctx.libarch.registered:
        func(ctx, ast)

    elapsed = time.clock()
    elapsed = elapsed - start
    debug__("Functions for processing ast in %fs" % elapsed)

    if ctx.color:
        ctx.libarch.process_ast.assign_colors(ctx, ast)

    return ast