Ejemplo n.º 1
0
    def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1):
        ARCH = self.load_arch_module()
        ARCH_OUTPUT = ARCH.output
        ARCH_UTILS = ARCH.utils

        ad = ctx.entry
        s = self.binary.get_section(ad)

        if s is None:
            # until is != -1 only from the visual mode
            # It allows to not go before the first section.
            if until != -1:
                return None
            # Get the next section, it's not mandatory that sections
            # are consecutives !
            s = self.binary.get_next_section(ad)
            if s is None:
                return None
            ad = s.start

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()
        o.curr_section = s
        o.mode_dump = True
        l = 0
        api = ctx.gctx.api

        # For mips: after a jump we add a newline, but for mips we should
        # add this newline after the prefetch instruction.
        prefetch_after_branch = False

        while 1:
            if ad == s.start:
                if not o.last_2_lines_are_empty():
                    o._new_line()
                o._dash()
                o._section(s.name)
                o._add("  0x%x -> 0x%x" % (s.start, s.end))
                o._new_line()
                o._new_line()

            while ((l < lines and until == -1) or (ad < until and until != -1)) \
                    and ad <= s.end:

                ty = self.mem.get_type(ad)

                # A PE import should not be displayed as a subroutine
                if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \
                        and self.mem.is_code(ad):

                    is_func = ad in self.functions

                    if is_func:
                        if not o.last_2_lines_are_empty():
                            o._new_line()
                        o._dash()
                        o._user_comment("; SUBROUTINE")
                        o._new_line()
                        o._dash()

                    i = self.lazy_disasm(ad, s.start)

                    if not is_func and ad in self.xrefs and \
                            not o.last_2_lines_are_empty():
                        o._new_line()

                    o._asm_inst(i)

                    is_end = ad in self.end_functions

                    # mips
                    if prefetch_after_branch:
                        prefetch_after_branch = False
                        if not is_end:
                            o._new_line()

                    if is_end:
                        for fad in self.end_functions[ad]:
                            sy = api.get_symbol(fad)
                            o._user_comment("; end function %s" % sy)
                            o._new_line()
                        o._new_line()

                    elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i):
                        if self.is_mips:
                            prefetch_after_branch = True
                        else:
                            o._new_line()

                    elif ARCH_UTILS.is_call(i):
                        op = i.operands[0]
                        if op.type == self.capstone.CS_OP_IMM:
                            imm = unsigned(op.value.imm)
                            if imm in self.functions and self.is_noreturn(imm):
                                if self.is_mips:
                                    prefetch_after_branch = True
                                else:
                                    o._new_line()

                    ad += i.size

                elif MEM_WOFFSET <= ty <= MEM_QOFFSET:
                    prefetch_after_branch = False
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    off = s.read_int(ad, sz)
                    if off is None:
                        continue
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._data_prefix(sz)
                    o._add(" ")
                    o._imm(off,
                           sz,
                           True,
                           print_data=False,
                           force_dont_print_data=True)
                    o._new_line()
                    ad += sz

                elif ty == MEM_ASCII:
                    prefetch_after_branch = False
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    buf = self.binary.get_string(ad, sz)

                    if buf is not None:
                        if ctx.gctx.print_bytes:
                            o._bytes(s.read(ad, sz))

                        # Split the string into multi lines

                        splitted = buf.split("\n")

                        j = 0
                        for i, st in enumerate(splitted):
                            if i > 0 and len(st) != 0:
                                o._new_line()
                                o.set_line(ad + j)
                                o._address(ad + j)

                            ibs = 0
                            bs = 65
                            while ibs < len(st):
                                if ibs > 0:
                                    o._new_line()
                                    o.set_line(ad + j)
                                    o._address(ad + j)

                                blk = st[ibs:ibs + bs]

                                if i < len(splitted) - 1 and ibs + bs >= len(
                                        st):
                                    o._string('"' + blk + '\\n"')
                                    j += len(blk) + 1
                                else:
                                    o._string('"' + blk + '"')
                                    j += len(blk)

                                ibs += bs

                    o._add(", 0")
                    o._new_line()
                    ad += sz

                elif ty == MEM_ARRAY:
                    prefetch_after_branch = False
                    o._label_and_address(ad)

                    array_info = self.mem.mm[ad]
                    total_size = array_info[0]
                    entry_type = array_info[2]
                    entry_size = self.mem.get_size_from_type(entry_type)

                    n = int(total_size / entry_size)

                    o.set_line(ad)
                    o._data_prefix(entry_size)

                    k = 0
                    while k < total_size:
                        if o.curr_index > 70:
                            o._new_line()
                            o.set_line(ad)
                            o._address(ad)
                            o._data_prefix(entry_size)
                            l += 1

                        val = s.read_int(ad, entry_size)
                        if MEM_WOFFSET <= entry_type <= MEM_QOFFSET:
                            o._add(" ")
                            o._imm(val,
                                   entry_size,
                                   True,
                                   print_data=False,
                                   force_dont_print_data=True)
                        else:
                            o._word(val, entry_size, is_from_array=True)

                        ad += entry_size
                        k += entry_size

                        if k < total_size:
                            o._add(",")

                    o._new_line()

                else:
                    prefetch_after_branch = False
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size_from_type(ty)
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._word(s.read_int(ad, sz), sz)
                    o._new_line()
                    ad += sz

                l += 1

            s = self.binary.get_section(ad)
            if s is None:
                # Get the next section, it's not mandatory that sections
                # are consecutives !
                s = self.binary.get_next_section(ad)
                if s is None:
                    break
                o._new_line()
                ad = s.start
                if until != -1 and ad >= until:
                    break

            if (l >= lines and until == -1) or (ad >= until and until != -1):
                break

            o.curr_section = s

        if until == ad:
            if self.mem.is_code(ad) and ad in self.xrefs or \
                    s is not None and ad == s.start:
                if not o.last_2_lines_are_empty():
                    o._new_line()

        # remove the last empty line
        o.lines.pop(-1)
        o.token_lines.pop(-1)

        o.join_lines()

        return o
Ejemplo n.º 2
0
    def get_graph(self, entry):
        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry)
        stack = [entry]
        start = time()
        prefetch = None
        addresses = set()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            addresses.add(ad)

            if ARCH_UTILS.is_ret(inst):
                prefetch = self.__add_prefetch(addresses, inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == self.capstone.CS_OP_IMM:
                    nxt = unsigned(op.value.imm)

                    if nxt in self.functions:
                        gph.new_node(inst, prefetch, None)
                    else:
                        stack.append(nxt)
                        gph.new_node(inst, prefetch, [nxt])

                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == self.capstone.CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(op.value.imm)
                    stack.append(direct_nxt)

                    if nxt_jmp in self.functions:
                        gph.new_node(inst, prefetch, [direct_nxt])
                    else:
                        stack.append(nxt_jmp)
                        gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                if ad != entry and ARCH_UTILS.is_call(inst):
                    # TODO: like in the analyzer, simulate registers
                    # -> during the analysis, save in the database
                    # the immediate value.
                    op = inst.operands[0]
                    if op.type == self.capstone.CS_OP_IMM:
                        imm = unsigned(op.value.imm)
                        if imm in self.functions and self.is_noreturn(imm):
                            prefetch = self.__add_prefetch(addresses, inst)
                            gph.new_node(inst, prefetch, None)
                            gph.exit_or_ret.add(ad)
                            continue

                    if op.type == self.capstone.CS_OP_MEM and \
                            op.mem.disp in self.binary.imports and \
                            self.binary.imports[op.mem.disp] & FUNC_FLAG_NORETURN:
                        prefetch = self.__add_prefetch(addresses, inst)
                        gph.new_node(inst, prefetch, None)
                        gph.exit_or_ret.add(ad)
                        continue

                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.reverse_stripped_list(self, addresses)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" %
                (elapsed, len(gph.nodes)))

        return gph, nb_new_syms
Ejemplo n.º 3
0
    def get_graph(self, entry):
        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry)
        stack = [entry]
        start = time()
        prefetch = None
        addresses = set()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            addresses.add(ad)

            if ARCH_UTILS.is_ret(inst):
                prefetch = self.__add_prefetch(addresses, inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == self.capstone.CS_OP_IMM:
                    nxt = unsigned(op.value.imm)

                    if nxt in self.functions:
                        gph.new_node(inst, prefetch, None)
                    else:
                        stack.append(nxt)
                        gph.new_node(inst, prefetch, [nxt])

                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == self.capstone.CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(op.value.imm)
                    stack.append(direct_nxt)

                    if nxt_jmp in self.functions:
                        gph.new_node(inst, prefetch, [direct_nxt])
                    else:
                        stack.append(nxt_jmp)
                        gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                if ad != entry and ARCH_UTILS.is_call(inst):
                    op = inst.operands[0]
                    if op.type == self.capstone.CS_OP_IMM:
                        imm = unsigned(op.value.imm)
                        if imm in self.functions and self.is_noreturn(imm):
                            prefetch = self.__add_prefetch(addresses, inst)
                            gph.new_node(inst, prefetch, None)
                            continue

                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.reverse_stripped_list(self, addresses)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes)))

        return gph, nb_new_syms
Ejemplo n.º 4
0
    def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code):
        # If entry is not "code", we have to rollback added xrefs
        has_bad_inst = False
        if add_if_code:
            added_xrefs = []

        regsctx = self.arch_analyzer.new_regs_context()
        if regsctx is None:
            # fatal error, but don't quit to let the user save the database
            return False

        flags = 0
        stack_err = False
        args_restore = 0
        if func_obj is not None:
            frame_size = func_obj[FUNC_FRAME_SIZE]
            if frame_size == -1:
                frame_size = self.ARCH_UTILS.guess_frame_size(self, entry)
                # used in arch/*/analyzer.c
                func_obj[FUNC_FRAME_SIZE] = frame_size
        else:
            frame_size = -1

        ret_found = False
        stack = [(regsctx, entry)]

        while stack:
            (regsctx, ad) = stack.pop()

            if self.db.mem.is_data(ad):
                continue

            inst = self.disasm(ad)

            if inst is None:
                has_bad_inst = True
                if add_if_code:
                    break
                continue

            if ad in inner_code:
                continue

            if self.gctx.debugsp:
                ALL_SP[ad] = self.arch_analyzer.get_sp(regsctx)

            inner_code[ad] = inst

            ##### RETURN #####
            if self.is_ret(inst):
                self.__add_prefetch(regsctx, inst, func_obj, inner_code)
                ret_found = True

                if self.dis.is_x86 and len(inst.operands) == 1:
                    args_restore = inst.operands[0].value.imm
                    flags |= FUNC_FLAG_STDCALL

                if self.arch_analyzer.get_sp(regsctx) != 0:
                    flags |= FUNC_FLAG_ERR_STACK_ANALYSIS

            ##### UNCONDITIONAL JUMP #####
            elif self.is_uncond_jump(inst):
                self.__add_prefetch(regsctx, inst, func_obj, inner_code)
                op = inst.operands[-1]
                jmp_ad = None

                if op.type == self.ARCH_UTILS.OP_IMM:
                    jmp_ad = unsigned(op.value.imm)

                else:
                    is_jmptable = inst.address in self.jmptables

                    # Create a jumptable if necessary
                    if not is_jmptable:
                        if op.type == self.ARCH_UTILS.OP_REG:
                            jmp_ad = self.arch_analyzer.reg_value(
                                regsctx, op.value.reg)
                            if jmp_ad is None:
                                is_jmptable = self.auto_jump_table(
                                    inst, inner_code)

                        elif op.type == self.ARCH_UTILS.OP_MEM:
                            self.arch_analyzer.analyze_operands(
                                self, regsctx, inst, func_obj, False)
                            is_jmptable = self.auto_jump_table(
                                inst, inner_code)

                    if is_jmptable:
                        table = self.jmptables[inst.address].table
                        for n in table:
                            r = self.arch_analyzer.clone_regs_context(regsctx)
                            stack.append((r, n))
                        self.api.add_xrefs_table(ad, table)
                        if add_if_code:
                            added_xrefs.append((ad, table))
                        continue

                    self.arch_analyzer.analyze_operands(
                        self, regsctx, inst, func_obj, False)
                    # TODO: assume there is return
                    if jmp_ad is None:
                        if entry in self.db.imports:
                            ret_found |= self.db.imports[
                                entry] & FUNC_FLAG_NORETURN
                        else:
                            ret_found = True
                        continue

                self.api.add_xref(ad, jmp_ad)
                if self.db.mem.is_func(jmp_ad):
                    ret_found |= not self.is_func_noreturn(jmp_ad, entry)
                    fo = self.functions[jmp_ad]
                    flags = fo[FUNC_FLAGS]
                    frame_size = max(fo[FUNC_FRAME_SIZE], frame_size)
                    args_restore = fo[FUNC_ARGS_RESTORE]
                else:
                    stack.append((regsctx, jmp_ad))
                if add_if_code:
                    added_xrefs.append((ad, jmp_ad))

            ##### CONDITIONAL JUMP #####
            elif self.is_cond_jump(inst):
                prefetch = self.__add_prefetch(regsctx, inst, func_obj,
                                               inner_code)

                op = inst.operands[-1]
                if op.type == self.ARCH_UTILS.OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(unsigned(op.value.imm))
                    self.api.add_xref(ad, nxt_jmp)

                    if self.db.mem.is_func(direct_nxt):
                        ret_found |= not self.is_func_noreturn(
                            direct_nxt, entry)
                        fo = self.functions[direct_nxt]
                        flags = fo[FUNC_FLAGS]
                        frame_size = max(fo[FUNC_FRAME_SIZE], frame_size)
                        args_restore = fo[FUNC_ARGS_RESTORE]
                    else:
                        stack.append((regsctx, direct_nxt))

                    if add_if_code:
                        added_xrefs.append((ad, nxt_jmp))

                    if self.db.mem.is_func(nxt_jmp):
                        ret_found |= not self.is_func_noreturn(nxt_jmp, entry)
                    else:
                        newctx = self.arch_analyzer.clone_regs_context(regsctx)
                        stack.append((newctx, nxt_jmp))
                else:
                    self.arch_analyzer.analyze_operands(
                        self, regsctx, inst, func_obj, False)
                    # TODO : jump tables for conditional jumps ?

            ##### CALL #####
            elif self.is_call(inst):
                op = inst.operands[-1]
                call_ad = None
                sp_before = self.arch_analyzer.get_sp(regsctx)

                if op.type == self.ARCH_UTILS.OP_IMM:
                    call_ad = unsigned(op.value.imm)
                elif op.type == self.ARCH_UTILS.OP_REG:
                    # FIXME : for MIPS, addresses are loaded in t9 (generally)
                    # then jalr t9 is executed. The problem here is that we
                    # will analyze twice the function. The first time is done
                    # by the function analyze_imm.
                    call_ad = self.arch_analyzer.reg_value(
                        regsctx, op.value.reg)
                else:
                    self.arch_analyzer.analyze_operands(
                        self, regsctx, inst, func_obj, False)
                    if self.db.mem.is_func(op.mem.disp) and \
                            self.is_func_noreturn(op.mem.disp, entry):
                        self.__add_prefetch(regsctx, inst, func_obj,
                                            inner_code)
                        continue

                if call_ad is not None:
                    self.api.add_xref(ad, call_ad)

                    if add_if_code:
                        added_xrefs.append((ad, call_ad))

                    self.analyze_flow(call_ad,
                                      entry_is_func=True,
                                      force=False,
                                      add_if_code=add_if_code)

                    # TODO: if the address was alredy in the pending list
                    # we don't have a computed args size
                    # Reset the stack pointer to frame_size to handle stdcall.
                    if frame_size != -1 and call_ad in self.functions:
                        fo = self.functions[call_ad]
                        if fo is not None:
                            n = fo[FUNC_ARGS_RESTORE]
                            if n:
                                self.arch_analyzer.set_sp(
                                    regsctx, sp_before + n)

                    if self.db.mem.is_func(call_ad) and \
                            self.is_func_noreturn(call_ad, entry):
                        self.__add_prefetch(regsctx, inst, func_obj,
                                            inner_code)
                        continue

                # It seems it doesn't matter for the prefetched instruction
                nxt = inst.address + inst.size
                stack.append((regsctx, nxt))

            ##### OTHERS #####
            else:
                self.arch_analyzer.analyze_operands(self, regsctx, inst,
                                                    func_obj, False)

                nxt = inst.address + inst.size
                if nxt not in self.functions:
                    stack.append((regsctx, nxt))

        # Remove all xrefs, this is not a correct flow
        if add_if_code and has_bad_inst:
            for from_ad, to_ad in added_xrefs:
                if isinstance(to_ad, list):
                    self.api.rm_xrefs_table(from_ad, to_ad)
                else:
                    self.api.rm_xref(from_ad, to_ad)
            return False

        if func_obj is not None:
            if entry in self.db.imports:
                if self.db.imports[entry] & FUNC_FLAG_NORETURN:
                    flags |= FUNC_FLAG_NORETURN
            elif not ret_found:
                flags |= FUNC_FLAG_NORETURN

            func_obj[FUNC_FLAGS] = flags
            func_obj[FUNC_FRAME_SIZE] = frame_size
            func_obj[FUNC_ARGS_RESTORE] = args_restore

        return True
Ejemplo n.º 5
0
    def _imm(self,
             imm,
             op_size,
             hexa,
             section=None,
             print_data=True,
             force_dont_print_data=False,
             is_from_jump=False):

        if self.gctx.capstone_string != 0:
            hexa = True

        if hexa:
            imm = unsigned(imm)

        label_printed = self._label(imm, print_colon=False)

        if label_printed:
            ty = self._dis.mem.get_type(imm)
            # ty == -1 : from the terminal (with -x) there are no xrefs if
            # the file was loaded without a database.
            if ty == MEM_HEAD and self._dis.mem.get_type(
                    self._dis.mem.get_head_addr(imm)) == MEM_ASCII:
                ty = MEM_ASCII

            if imm in self._dis.xrefs and ty != MEM_UNK and \
                    ty != MEM_ASCII or ty == -1:
                return

            if ty == MEM_ASCII:
                print_data = True
                force_dont_print_data = False

        if section is None:
            section = self._binary.get_section(imm)

        if section is not None and section.start == 0:
            section = None

        # For a raw file, if the raw base is 0 the immediate is considered
        # as an address only if it's in the symbols list.
        raw_base_zero = self._binary.type == T_BIN_RAW and self.gctx.raw_base == 0

        if section is not None and not raw_base_zero:
            if not label_printed:
                self._address(imm, print_colon=False, notprefix=True)

            if not force_dont_print_data and print_data:
                s = self._binary.get_string(imm, self.gctx.max_data_size)
                if s is not None:
                    s = s.replace("\n", "\\n")
                    self._add(" ")
                    self._string('"' + s + '"')

            return

        if label_printed:
            return

        if op_size == 1:
            if imm == 10:
                self._string("'\\n'")
            else:
                self._string("'%s'" % get_char(imm))
        elif hexa:
            if is_from_jump:
                self._error(hex(imm))
            else:
                self._add(hex(imm))
        else:
            if op_size == 4:
                self._add(str(c_int(imm).value))
            elif op_size == 2:
                self._add(str(c_short(imm).value))
            else:
                self._add(str(c_long(imm).value))

            if imm > 0:
                if op_size == 4:
                    packed = struct.pack("<L", imm)
                elif op_size == 8:
                    packed = struct.pack("<Q", imm)
                else:
                    return
                if set(packed).issubset(BYTES_PRINTABLE_SET):
                    self._string(" \"" + "".join(map(chr, packed)) + "\"")

        return
Ejemplo n.º 6
0
    def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1):
        ARCH = self.load_arch_module()
        ARCH_OUTPUT = ARCH.output
        ARCH_UTILS = ARCH.utils

        ad = ctx.entry
        s = self.binary.get_section(ad)

        if s is None:
            # until is != -1 only from the visual mode
            # It allows to not go before the first section.
            if until != -1:
                return None
            # Get the next section, it's not mandatory that sections
            # are consecutives !
            s = self.binary.get_next_section(ad)
            if s is None:
                return None
            ad = s.start

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()
        o.curr_section = s
        o.mode_dump = True
        l = 0
        api = ctx.gctx.api

        # For mips: after a jump we add a newline, but for mips we should
        # add this newline after the prefetch instruction.
        prefetch_after_branch = False

        while 1:
            if ad == s.start:
                if not o.is_last_2_line_empty():
                    o._new_line()
                o._dash()
                o._section(s.name)
                o._add("  0x%x -> 0x%x" % (s.start, s.end))
                o._new_line()
                o._new_line()

            while ((l < lines and until == -1) or (ad < until and until != -1)) \
                    and ad <= s.end:

                ty = self.mem.get_type(ad)

                # A PE import should not be displayed as a subroutine
                if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \
                        and self.mem.is_code(ad):

                    is_func = ad in self.functions

                    if is_func:
                        if not o.is_last_2_line_empty():
                            o._new_line()
                        o._dash()
                        o._user_comment("; SUBROUTINE")
                        o._new_line()
                        o._dash()

                    i = self.lazy_disasm(ad, s.start)

                    if not is_func and ad in self.xrefs and \
                            not o.is_last_2_line_empty():
                        o._new_line()

                    o._asm_inst(i)

                    is_end = ad in self.end_functions

                    # mips
                    if prefetch_after_branch:
                        prefetch_after_branch = False
                        if not is_end:
                            o._new_line()

                    if is_end:
                        for fad in self.end_functions[ad]:
                            sy = api.get_symbol(fad)
                            o._user_comment("; end function %s" % sy)
                            o._new_line()
                        o._new_line()

                    elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i):
                        if self.is_mips:
                            prefetch_after_branch = True
                        else:
                            o._new_line()

                    elif ARCH_UTILS.is_call(i):
                        op = i.operands[0]
                        if op.type == self.capstone.CS_OP_IMM:
                            imm = unsigned(op.value.imm)
                            if imm in self.functions and self.is_noreturn(imm):
                                if self.is_mips:
                                    prefetch_after_branch = True
                                else:
                                    o._new_line()

                    ad += i.size

                elif MEM_WOFFSET <= ty <= MEM_QOFFSET:
                    prefetch_after_branch = False
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    off = s.read_int(ad, sz)
                    if off is None:
                        continue
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._data_prefix(sz)
                    o._add(" ")
                    o._imm(off, sz, True, print_data=False, force_dont_print_data=True)
                    o._new_line()
                    ad += sz

                elif ty == MEM_ASCII:
                    prefetch_after_branch = False
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    buf = self.binary.get_string(ad, sz)

                    if buf is not None:
                        if ctx.gctx.print_bytes:
                            o._bytes(s.read(ad, sz))

                        # Split the string into multi lines

                        splitted = buf.split("\n")

                        j = 0
                        for i, st in enumerate(splitted):
                            if i > 0 and len(st) != 0:
                                o._new_line()
                                o.set_line(ad + j)
                                o._address(ad + j)

                            ibs = 0
                            bs = 65
                            while ibs < len(st):
                                if ibs > 0:
                                    o._new_line()
                                    o.set_line(ad + j)
                                    o._address(ad + j)

                                blk = st[ibs:ibs + bs]

                                if i < len(splitted) - 1 and ibs + bs >= len(st):
                                    o._string('"' + blk + '\\n"')
                                    j += len(blk) + 1
                                else:
                                    o._string('"' + blk + '"')
                                    j += len(blk)

                                ibs += bs

                    o._add(", 0")
                    o._new_line()
                    ad += sz

                elif ty == MEM_ARRAY:
                    prefetch_after_branch = False
                    o._label_and_address(ad)

                    array_info = self.mem.mm[ad]
                    total_size = array_info[0]
                    entry_type = array_info[2]
                    entry_size = self.mem.get_size_from_type(entry_type)

                    n = int(total_size / entry_size)

                    o.set_line(ad)
                    o._data_prefix(entry_size)

                    k = 0
                    while k < total_size:
                        if o.curr_index > 70:
                            o._new_line()
                            o.set_line(ad)
                            o._address(ad)
                            o._data_prefix(entry_size)
                            l += 1

                        val = s.read_int(ad, entry_size)
                        if MEM_WOFFSET <= entry_type <= MEM_QOFFSET:
                            o._add(" ")
                            o._imm(val, entry_size, True,
                                   print_data=False, force_dont_print_data=True)
                        else:
                            o._word(val, entry_size, is_from_array=True)

                        ad += entry_size
                        k += entry_size

                        if k < total_size:
                            o._add(",")

                    o._new_line()

                else:
                    prefetch_after_branch = False
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size_from_type(ty)
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._word(s.read_int(ad, sz), sz)
                    o._new_line()
                    ad += sz

                l += 1

            s = self.binary.get_section(ad)
            if s is None:
                # Get the next section, it's not mandatory that sections
                # are consecutives !
                s = self.binary.get_next_section(ad)
                if s is None:
                    break
                o._new_line()
                ad = s.start
                if until != -1 and ad >= until:
                    break

            if (l >= lines and until == -1) or (ad >= until and until != -1):
                break

            o.curr_section = s

        if until == ad:
            if self.mem.is_code(ad) and ad in self.xrefs or \
                    s is not None and ad == s.start:
                if not o.is_last_2_line_empty():
                    o._new_line()

        # remove the last empty line
        o.lines.pop(-1)
        o.token_lines.pop(-1)

        o.join_lines()

        return o
Ejemplo n.º 7
0
    def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code):
        # If entry is not "code", we have to rollback added xrefs
        has_bad_inst = False
        if add_if_code:
            added_xrefs = []

        regsctx = self.arch_analyzer.new_regs_context()
        if regsctx is None:
            # fatal error, but don't quit to let the user save the database
            return -1

        if func_obj is not None:
            frame_size = self.ARCH_UTILS.guess_frame_size(self, entry)
            func_obj[FUNC_FRAME_SIZE] = frame_size
        else:
            frame_size = -1

        sp_after_push = 0
        last_call = None
        has_ret = False
        stack = [(regsctx, entry)]

        while stack:
            (regsctx, ad) = stack.pop()
            inst = self.disasm(ad)

            if inst is None:
                has_bad_inst = True
                if add_if_code:
                    break
                continue

            if ad in inner_code:
                continue

            if self.gctx.debugsp:
                ALL_SP[ad] = self.arch_analyzer.get_sp(regsctx)

            inner_code[ad] = inst

            ##### RETURN #####
            if self.is_ret(inst):
                self.__add_prefetch(inner_code, inst)
                has_ret = True

            ##### UNCONDITIONAL JUMP #####
            elif self.is_uncond_jump(inst):
                self.__add_prefetch(inner_code, inst)
                op = inst.operands[-1]
                jmp_ad = None

                if op.type == self.ARCH_UTILS.OP_IMM:
                    jmp_ad = unsigned(op.value.imm)

                else:
                    is_jmptable = inst.address in self.jmptables

                    # Create a jumptable if necessary
                    if not is_jmptable:
                        if op.type == self.ARCH_UTILS.OP_REG:
                            jmp_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg)
                            if jmp_ad is None:
                                is_jmptable = self.auto_jump_table(inst, inner_code)

                        elif op.type == self.ARCH_UTILS.OP_MEM:
                            self.arch_analyzer.analyze_operands(
                                    self, regsctx, inst, func_obj, False)
                            is_jmptable = self.auto_jump_table(inst, inner_code)

                    if is_jmptable:
                        table = self.jmptables[inst.address].table
                        for n in table:
                            r = self.arch_analyzer.clone_regs_context(regsctx)
                            stack.append((r, n))
                        self.api.add_xrefs_table(ad, table)
                        if add_if_code:
                            added_xrefs.append((ad, table))
                        continue

                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)
                    # TODO: assume it has a return
                    if jmp_ad is None:
                        has_ret = True
                        continue

                self.api.add_xref(ad, jmp_ad)
                if self.db.mem.is_func(jmp_ad):
                    has_ret = not self.is_noreturn(jmp_ad, entry)
                else:
                    stack.append((regsctx, jmp_ad))
                if add_if_code:
                    added_xrefs.append((ad, jmp_ad))


            ##### CONDITIONAL JUMP #####
            elif self.is_cond_jump(inst):
                prefetch = self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]
                if op.type == self.ARCH_UTILS.OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(unsigned(op.value.imm))
                    self.api.add_xref(ad, nxt_jmp)

                    if self.db.mem.is_func(direct_nxt):
                        has_ret = not self.is_noreturn(direct_nxt, entry)
                    else:
                        stack.append((regsctx, direct_nxt))

                    if add_if_code:
                        added_xrefs.append((ad, nxt_jmp))

                    if self.db.mem.is_func(nxt_jmp):
                        has_ret = not self.is_noreturn(nxt_jmp, entry)
                    else:
                        newctx = self.arch_analyzer.clone_regs_context(regsctx)
                        stack.append((newctx, nxt_jmp))
                else:
                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)
                    # TODO : jump tables for conditional jumps ?

            ##### CALL #####
            elif self.is_call(inst):
                op = inst.operands[-1]
                call_ad = None

                if op.type == self.ARCH_UTILS.OP_IMM:
                    call_ad = unsigned(op.value.imm)
                elif op.type == self.ARCH_UTILS.OP_REG:
                    # FIXME : for MIPS, addresses are loaded in t9 (generally)
                    # then jalr t9 is executed. The problem here is that we
                    # will analyze twice the function. The first time is done
                    # by the function analyze_imm.
                    call_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg)
                else:
                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)

                if call_ad is not None:
                    last_call = call_ad
                    self.api.add_xref(ad, call_ad)

                    if add_if_code:
                        added_xrefs.append((ad, call_ad))

                    self.analyze_flow(
                            call_ad,
                            entry_is_func=True,
                            force=False,
                            add_if_code=add_if_code)

                    # Reset the stack pointer to frame_size to handle stdcall.
                    if frame_size != -1:
                        sp_after_push = self.arch_analyzer.get_sp(regsctx)
                        if frame_size != - sp_after_push:
                            self.arch_analyzer.set_sp(regsctx, -frame_size)

                    if self.db.mem.is_func(call_ad):
                        if self.is_noreturn(call_ad, entry):
                            self.__add_prefetch(inner_code, inst)
                            continue

                nxt = inst.address + inst.size
                stack.append((regsctx, nxt))

            ##### OTHERS #####
            else:
                if frame_size != -1:
                    sp_before = self.arch_analyzer.get_sp(regsctx)

                self.arch_analyzer.analyze_operands(
                        self, regsctx, inst, func_obj, False)

                # Restore the stack pointer to sp_after_push to handle cdecl.
                if frame_size != -1:
                    curr_sp = self.arch_analyzer.get_sp(regsctx)
                    if curr_sp != sp_before and \
                            self.handle_cdecl(frame_size, sp_after_push, curr_sp):

                        new_sp = sp_after_push - sp_before - curr_sp
                        self.arch_analyzer.set_sp(regsctx, new_sp)

                        if last_call is not None and self.db.mem.is_func(last_call):
                            self.functions[last_call][FUNC_FLAGS] |= FUNC_FLAG_CDECL

                        if self.gctx.debugsp:
                            ALL_SP[ad] = sp_after_push

                        sp_after_push = 0

                nxt = inst.address + inst.size
                if nxt not in self.functions:
                    stack.append((regsctx, nxt))

        # Remove all xrefs, this is not a correct flow
        if add_if_code and has_bad_inst:
            for from_ad, to_ad in added_xrefs:
                if isinstance(to_ad, list):
                    self.api.rm_xrefs_table(from_ad, to_ad)
                else:
                    self.api.rm_xref(from_ad, to_ad)
            return -1

        # Set function flags
        flags = self.import_flags(entry)
        if flags == 0 and not has_ret:
            flags = FUNC_FLAG_NORETURN

        return flags
Ejemplo n.º 8
0
    def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1):
        ARCH = self.load_arch_module()
        ARCH_OUTPUT = ARCH.output
        ARCH_UTILS = ARCH.utils

        ad = ctx.entry
        s = self.binary.get_section(ad)

        if s is None:
            # until is != -1 only from the visual mode
            # It allows to not go before the first section.
            if until != -1:
                return None
            # Get the next section, it's not mandatory that sections
            # are consecutives !
            s = self.binary.get_next_section(ad)
            if s is None:
                return None
            ad = s.start

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()
        o.section_prefix = True
        o.curr_section = s
        o.mode_dump = True
        l = 0
        api = ctx.gctx.api

        # For mips: after a jump we add a newline, but for mips we should
        # add this newline after the prefetch instruction.
        prefetch_after_branch = False

        while 1:
            if ad == s.start:
                if not o.is_last_2_line_empty():
                    o._new_line()
                o._dash()
                o._section(s.name)
                o._add("  0x%x -> 0x%x" % (s.start, s.end))
                o._new_line()
                o._new_line()

            while ((l < lines and until == -1) or (ad < until and until != -1)) \
                    and ad <= s.end:

                ty = self.mem.get_type(ad)

                # A PE import should not be displayed as a subroutine
                if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \
                        and self.mem.is_code(ad):

                    is_func = ad in self.functions

                    if is_func:
                        if not o.is_last_2_line_empty():
                            o._new_line()
                        o._dash()
                        o._user_comment("; SUBROUTINE")
                        o._new_line()
                        o._dash()

                    i = self.lazy_disasm(ad, s.start)

                    if not is_func and ad in self.xrefs and \
                            not o.is_last_2_line_empty():
                        o._new_line()

                    o._asm_inst(i)

                    is_end = ad in self.end_functions

                    # mips
                    if prefetch_after_branch:
                        prefetch_after_branch = False
                        if not is_end:
                            o._new_line()

                    if is_end:
                        for fad in self.end_functions[ad]:
                            sy = api.get_symbol(fad)
                            o._user_comment("; end function %s" % sy)
                            o._new_line()
                        o._new_line()

                    elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i):
                        if self.is_mips:
                            prefetch_after_branch = True
                        else:
                            o._new_line()

                    elif ARCH_UTILS.is_call(i):
                        op = i.operands[0]
                        if op.type == self.capstone.CS_OP_IMM:
                            imm = unsigned(op.value.imm)
                            if imm in self.functions and self.is_noreturn(imm):
                                if self.is_mips:
                                    prefetch_after_branch = True
                                else:
                                    o._new_line()

                    ad += i.size

                elif ty == MEM_OFFSET:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    off = s.read_int(ad, sz)
                    if off is None:
                        continue
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._data_prefix(sz)
                    o._add(" ")
                    o._imm(off, sz, True, print_data=False, force_dont_print_data=True)
                    o._new_line()
                    ad += sz

                elif ty == MEM_ASCII:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    buf = self.binary.get_string(ad, sz)
                    if buf is not None:
                        if ctx.gctx.print_bytes:
                            o._bytes(s.read(ad, sz))
                        o._string('"' + buf + '"')
                    o._add(", 0")
                    o._new_line()
                    ad += sz

                else:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size_from_type(ty)
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._word(s.read_int(ad, sz), sz)
                    o._new_line()
                    ad += sz

                l += 1

            s = self.binary.get_section(ad)
            if s is None:
                # Get the next section, it's not mandatory that sections
                # are consecutives !
                s = self.binary.get_next_section(ad)
                if s is None:
                    break
                o._new_line()
                ad = s.start
                if until != -1 and ad >= until:
                    break

            if (l >= lines and until == -1) or (ad >= until and until != -1):
                break

            o.curr_section = s

        if until == ad:
            if self.mem.is_code(ad) and ad in self.xrefs or ad == s.start:
                if not o.is_last_2_line_empty():
                    o._new_line()

        # remove the last empty line
        o.lines.pop(-1)
        o.token_lines.pop(-1)

        o.join_lines()

        return o
Ejemplo n.º 9
0
	def _getIMMString(self, imm, op_size, hexa, section=None, print_data=True, force_dont_print_data=False):
		hexa = True
		imm = unsigned(imm)
		label_printed = "LL" #self._label(imm, print_colon=False)

		res = ""
		if label_printed:
			ty = self.ctx.gctx.dis.mem.get_type(imm)
			# ty == -1 : from the terminal (with -x) there are no xrefs if
			# the file was loaded without a database.
			if imm in self.ctx.gctx.dis.xrefs and ty != MEM_UNK and \
					ty != MEM_ASCII or ty == -1:
				return TextOp(str(imm))

			if ty == MEM_ASCII:
				print_data = True
				force_dont_print_data = False

		if section is None:
			section = self.ctx.gctx.dis.binary.get_section(imm)

		if section is not None and section.start == 0:
			section = None

		# For a raw file, if the raw base is 0 the immediate is considered
		# as an address only if it's in the symbols list.
		raw_base_zero = self.ctx.gctx.dis.binary.type == T_BIN_RAW and self.gctx.raw_base == 0

		if section is not None and not raw_base_zero:
			if not label_printed:
				res += "A1" #self._address(imm, print_colon=False, notprefix=True)

			if not force_dont_print_data and print_data:
				s = self.ctx.gctx.dis.binary.get_string(imm, self.ctx.gctx.max_data_size)
				if s is not None:
					res += " "
					res += '"' + s + '"'
					return StrOp(s)

			return TextOp(res)

		if label_printed:
			return TextOp(res)

		if op_size == 1:
			self._string("'%s'" % get_char(imm))
		elif hexa:
			self._add(hex(imm))
		else:
			if op_size == 4:
				self._add(str(c_int(imm).value))
			elif op_size == 2:
				self._add(str(c_short(imm).value))
			else:
				self._add(str(c_long(imm).value))

			if imm > 0:
				if op_size == 4:
					packed = struct.pack("<L", imm)
				elif op_size == 8:
					packed = struct.pack("<Q", imm)
				else:
					return TextOp(res)
				if set(packed).issubset(BYTES_PRINTABLE_SET):
					self._string(" \"" + "".join(map(chr, packed)) + "\"")

		return TextOp(res)
Ejemplo n.º 10
0
    def _imm(self, imm, op_size, hexa, section=None, print_data=True,
             force_dont_print_data=False, is_from_jump=False):

        if self.gctx.capstone_string != 0:
            hexa = True

        if hexa:
            imm = unsigned(imm)

        label_printed = self._label(imm, print_colon=False)

        if label_printed:
            ty = self._dis.mem.get_type(imm)
            # ty == -1 : from the terminal (with -x) there are no xrefs if
            # the file was loaded without a database.
            if ty == MEM_HEAD and self._dis.mem.get_type(
                    self._dis.mem.get_head_addr(imm)) == MEM_ASCII:
                ty = MEM_ASCII

            if imm in self._dis.xrefs and ty != MEM_UNK and \
                    ty != MEM_ASCII or ty == -1:
                return

            if ty == MEM_ASCII:
                print_data = True
                force_dont_print_data = False

        if section is None:
            section = self._binary.get_section(imm)

        if section is not None and section.start == 0:
            section = None

        # For a raw file, if the raw base is 0 the immediate is considered
        # as an address only if it's in the symbols list.
        raw_base_zero = self._binary.type == T_BIN_RAW and self.gctx.raw_base == 0

        if section is not None and not raw_base_zero:
            if not label_printed:
                self._address(imm, print_colon=False, notprefix=True)

            if not force_dont_print_data and print_data:
                s = self._binary.get_string(imm, self.gctx.max_data_size)
                if s is not None:
                    s = s.replace("\n", "\\n")
                    self._add(" ")
                    self._string('"' + s + '"')

            return

        if label_printed:
            return

        if op_size == 1:
            if imm == 10:
                self._string("'\\n'")
            else:
                self._string("'%s'" % get_char(imm))
        elif hexa:
            if is_from_jump:
                self._error(hex(imm))
            else:
                self._add(hex(imm))
        else:
            if op_size == 4:
                self._add(str(c_int(imm).value))
            elif op_size == 2:
                self._add(str(c_short(imm).value))
            else:
                self._add(str(c_long(imm).value))

            if imm > 0:
                if op_size == 4:
                    packed = struct.pack("<L", imm)
                elif op_size == 8:
                    packed = struct.pack("<Q", imm)
                else:
                    return
                if set(packed).issubset(BYTES_PRINTABLE_SET):
                    self._string(" \"" + "".join(map(chr, packed)) + "\"")

        return
Ejemplo n.º 11
0
    def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code):
        if self.dis.binary.get_section(entry) is None:
            return -1

        has_ret = False

        # If entry is not "code", we have to rollback added xrefs
        has_bad_inst = False
        if add_if_code:
            added_xrefs = []

        regsctx = self.arch_analyzer.new_regs_context()
        if regsctx is None:
            # fatal error, but don't quit to let the user save the database
            return 0

        # FIXME : this is a hack for the cdecl calling convention
        # if the stack pointer move after a call, this is probably a cdecl
        # call, so we will ignore the add instruction.
        one_call_called = False

        stack = [(regsctx, entry)]

        while stack:
            (regsctx, ad) = stack.pop()
            inst = self.disasm(ad)

            if inst is None:
                has_bad_inst = True
                if add_if_code:
                    break
                continue

            if ad in inner_code:
                continue

            inner_code[ad] = inst

            self.arch_analyzer.analyze_operands(
                    self, regsctx, inst, func_obj, one_call_called)

            if self.is_ret(inst):
                self.__add_prefetch(inner_code, inst)
                has_ret = True

            elif self.is_uncond_jump(inst):
                self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]

                if op.type == self.ARCH_UTILS.OP_IMM:
                    nxt = unsigned(op.value.imm)
                    self.api.add_xref(ad, nxt)
                    if self.db.mem.is_func(nxt):
                        has_ret = not self.is_noreturn(nxt, entry)
                    else:
                        stack.append((regsctx, nxt))
                    if add_if_code:
                        added_xrefs.append((ad, nxt))
                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        # TODO : dupplicate regsctx ??
                        for n in table:
                            stack.append((regsctx, n))
                        self.api.add_xref(ad, table)
                        if add_if_code:
                            added_xrefs.append((ad, table))
                    else:
                        # TODO
                        # This is a register or a memory access
                        # we can't say if the function really returns
                        has_ret = True

            elif self.is_cond_jump(inst):
                prefetch = self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]
                if op.type == self.ARCH_UTILS.OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(unsigned(op.value.imm))
                    self.api.add_xref(ad, nxt_jmp)
                    stack.append((regsctx, direct_nxt))

                    if add_if_code:
                        added_xrefs.append((ad, nxt_jmp))

                    if self.db.mem.is_func(nxt_jmp):
                        has_ret = not self.is_noreturn(nxt_jmp, entry)
                    else:
                        newctx = self.arch_analyzer.clone_regs_context(regsctx)
                        stack.append((newctx, nxt_jmp))

            elif self.is_call(inst):
                one_call_called = True
                op = inst.operands[-1]
                value = None

                if op.type == self.ARCH_UTILS.OP_IMM:
                    value = unsigned(op.value.imm)
                elif op.type == self.ARCH_UTILS.OP_REG:
                    # FIXME : for MIPS, addresses are loaded in t9 (generally)
                    # then jalr t9 is executed. The problem here is that we
                    # will analyze twice the function. The first time is done
                    # by the function analyze_imm.
                    value = self.arch_analyzer.reg_value(regsctx, op.value.reg)

                if value is not None:
                    self.api.add_xref(ad, value)

                    if add_if_code:
                        added_xrefs.append((ad, value))

                    if not self.db.mem.is_func(value):
                        self.analyze_flow(value, True, False, add_if_code)

                    if self.db.mem.is_func(value) and self.is_noreturn(value, entry):
                        self.__add_prefetch(inner_code, inst)
                        continue

                nxt = inst.address + inst.size
                stack.append((regsctx, nxt))

            else:
                nxt = inst.address + inst.size
                stack.append((regsctx, nxt))

        if add_if_code and has_bad_inst:
            for from_ad, to_ad in added_xrefs:
                self.api.rm_xrefs(from_ad, to_ad)
            return -1

        # for ELF
        if entry in self.dis.binary.imports:
            flags = self.import_flags(entry)
        elif has_ret:
            flags = 0
        else:
            flags = FUNC_FLAG_NORETURN

        return flags
Ejemplo n.º 12
0
    def __sub_analyze_flow(self, entry, inner_code, add_if_code):
        if self.dis.binary.get_section(entry) is None:
            return -1

        stack = [entry]
        has_ret = False

        # If entry is not "code", we have to rollback added xrefs
        has_bad_inst = False
        if add_if_code:
            added_xrefs = []

        while stack:
            ad = stack.pop()
            inst = self.disasm(ad)

            if inst is None:
                has_bad_inst = True
                if add_if_code:
                    break
                continue

            if ad in inner_code:
                continue

            inner_code[ad] = inst

            if self.is_ret(inst):
                self.__add_prefetch(inner_code, inst)
                has_ret = True

            elif self.is_uncond_jump(inst):
                self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]

                if op.type == self.CS_OP_IMM:
                    nxt = unsigned(op.value.imm)
                    self.api.add_xref(ad, nxt)
                    if self.db.mem.is_func(nxt):
                        has_ret = not self.is_noreturn(nxt, entry)
                    else:
                        stack.append(nxt)
                    if add_if_code:
                        added_xrefs.append((ad, nxt))
                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        self.api.add_xref(ad, table)
                        if add_if_code:
                            added_xrefs.append((ad, table))
                    else:
                        # TODO
                        # This is a register or a memory access
                        # we can't say if the function really returns
                        has_ret = True

            elif self.is_cond_jump(inst):
                prefetch = self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]
                if op.type == self.CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(unsigned(op.value.imm))
                    self.api.add_xref(ad, nxt_jmp)
                    stack.append(direct_nxt)

                    if add_if_code:
                        added_xrefs.append((ad, nxt_jmp))

                    if self.db.mem.is_func(nxt_jmp):
                        has_ret = not self.is_noreturn(nxt_jmp, entry)
                    else:
                        stack.append(nxt_jmp)

            elif self.is_call(inst):
                op = inst.operands[-1]
                if op.type == self.CS_OP_IMM:
                    imm = unsigned(op.value.imm)
                    self.api.add_xref(ad, imm)

                    if add_if_code:
                        added_xrefs.append((ad, imm))

                    if not self.db.mem.is_func(imm):
                        self.analyze_flow(imm, True, False, add_if_code)

                    if self.db.mem.is_func(imm) and self.is_noreturn(imm, entry):
                        self.__add_prefetch(inner_code, inst)
                        continue

                nxt = inst.address + inst.size
                stack.append(nxt)

            else:
                nxt = inst.address + inst.size
                stack.append(nxt)

        if add_if_code and has_bad_inst:
            for from_ad, to_ad in added_xrefs:
                self.api.rm_xrefs(from_ad, to_ad)
            return -1

        # for ELF
        if entry in self.dis.binary.imports:
            flags = self.import_flags(entry)
        elif has_ret:
            flags = 0
        else:
            flags = FUNC_FLAG_NORETURN

        return flags
Ejemplo n.º 13
0
    def analyze_operands(self, i, func_obj):
        b = self.dis.binary

        for op in i.operands:
            if op.type == self.CS_OP_IMM:
                val = unsigned(op.value.imm)

            elif op.type == self.CS_OP_MEM and op.mem.disp != 0:

                if self.dis.is_x86:
                    if op.mem.segment != 0:
                        continue
                    if op.mem.index == 0:
                        # Compute the rip register
                        if op.mem.base == self.X86_REG_EIP or \
                            op.mem.base == self.X86_REG_RIP:
                            val = i.address + i.size + unsigned(op.mem.disp)

                        # Check if it's a stack variable
                        elif (op.mem.base == self.X86_REG_EBP or \
                              op.mem.base == self.X86_REG_RBP):
                            if func_obj is not None:
                                ty = self.db.mem.find_type(op.size)
                                func_obj[FUNC_VARS][op.mem.disp] = [ty, None]
                            # Continue the loop !!
                            continue
                        else:
                            val = unsigned(op.mem.disp)
                    else:
                        val = unsigned(op.mem.disp)

                # TODO: stack variables for arm/mips

                elif self.dis.is_arm:
                    if op.mem.index == 0 and op.mem.base == self.ARM_REG_PC:
                        val = i.address + i.size * 2 + op.mem.disp
                    else:
                        val = op.mem.disp

                elif self.dis.is_mips:
                    if op.mem.base == self.MIPS_REG_GP:
                        if self.dis.mips_gp == -1:
                            continue
                        val = op.mem.disp + self.dis.mips_gp
                    else:
                        val = op.mem.disp
            else:
                continue

            s = b.get_section(val)
            if s is None or s.start == 0:
                continue

            self.api.add_xref(i.address, val)

            if not self.db.mem.exists(val):
                sz = op.size if self.dis.is_x86 else self.dis.wordsize
                deref = s.read_int(val, sz)

                # If (*val) is an address
                if deref is not None and b.is_address(deref):
                    ty = MEM_OFFSET
                    self.api.add_xref(val, deref)

                    if not self.db.mem.exists(deref):
                        self.db.mem.add(deref, 1, MEM_UNK)

                        # Do an anlysis on this value.
                        if deref not in self.pending and \
                                deref not in self.pending_not_curr and \
                                self.first_inst_are_code(deref):

                            self.pending_not_curr.add(deref)
                            self.msg.put(
                                (deref, self.has_prolog(deref), False, True, None))
                else:
                    # Check if this is an address to a string
                    sz = b.is_string(val)
                    if sz != 0:
                        ty = MEM_ASCII
                    else:
                        sz = op.size if self.dis.is_x86 else self.dis.wordsize
                        if op.type == self.CS_OP_MEM:
                            ty = self.db.mem.find_type(sz)
                        else:
                            ty = MEM_UNK

                self.db.mem.add(val, sz, ty)

                if ty == MEM_UNK:
                    # Do an analysis on this value, if this is not code
                    # nothing will be done.
                    # jumps and calls are already analyzed in analyze_flow.
                    if val not in self.pending and \
                            not (self.is_jump(i) or self.is_call(i)) and \
                            val not in self.pending_not_curr and \
                            self.first_inst_are_code(val):

                        self.pending_not_curr.add(val)
                        self.msg.put(
                            (val, self.has_prolog(val), False, True, None))
Ejemplo n.º 14
0
    def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code):
        # If entry is not "code", we have to rollback added xrefs
        has_bad_inst = False
        if add_if_code:
            added_xrefs = []

        regsctx = self.arch_analyzer.new_regs_context()
        if regsctx is None:
            # fatal error, but don't quit to let the user save the database
            return -1

        if func_obj is not None:
            frame_size = self.ARCH_UTILS.guess_frame_size(self, entry)
            func_obj[FUNC_FRAME_SIZE] = frame_size
        else:
            frame_size = -1

        sp_after_push = 0
        last_call = None
        has_ret = False
        stack = [(regsctx, entry)]

        while stack:
            (regsctx, ad) = stack.pop()
            inst = self.disasm(ad)

            if inst is None:
                has_bad_inst = True
                if add_if_code:
                    break
                continue

            if ad in inner_code:
                continue

            if self.gctx.debugsp:
                ALL_SP[ad] = self.arch_analyzer.get_sp(regsctx)

            inner_code[ad] = inst

            ##### RETURN #####
            if self.is_ret(inst):
                self.__add_prefetch(inner_code, inst)
                has_ret = True

            ##### UNCONDITIONAL JUMP #####
            elif self.is_uncond_jump(inst):
                self.__add_prefetch(inner_code, inst)
                op = inst.operands[-1]
                jmp_ad = None

                if op.type == self.ARCH_UTILS.OP_IMM:
                    jmp_ad = unsigned(op.value.imm)

                else:
                    is_jmptable = inst.address in self.jmptables

                    # Create a jumptable if necessary
                    if not is_jmptable:
                        if op.type == self.ARCH_UTILS.OP_REG:
                            jmp_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg)
                            if jmp_ad is None:
                                is_jmptable = self.auto_jump_table(inst, inner_code)

                        elif op.type == self.ARCH_UTILS.OP_MEM:
                            self.arch_analyzer.analyze_operands(
                                    self, regsctx, inst, func_obj, False)
                            is_jmptable = self.auto_jump_table(inst, inner_code)

                    if is_jmptable:
                        table = self.jmptables[inst.address].table
                        for n in table:
                            r = self.arch_analyzer.clone_regs_context(regsctx)
                            stack.append((r, n))
                        self.api.add_xrefs_table(ad, table)
                        if add_if_code:
                            added_xrefs.append((ad, table))
                        continue

                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)
                    # TODO: assume it has a return
                    if jmp_ad is None:
                        has_ret = True
                        continue

                self.api.add_xref(ad, jmp_ad)
                if self.db.mem.is_func(jmp_ad):
                    has_ret = not self.is_noreturn(jmp_ad, entry)
                else:
                    stack.append((regsctx, jmp_ad))
                if add_if_code:
                    added_xrefs.append((ad, jmp_ad))


            ##### CONDITIONAL JUMP #####
            elif self.is_cond_jump(inst):
                prefetch = self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]
                if op.type == self.ARCH_UTILS.OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(unsigned(op.value.imm))
                    self.api.add_xref(ad, nxt_jmp)

                    if self.db.mem.is_func(direct_nxt):
                        has_ret = not self.is_noreturn(direct_nxt, entry)
                    else:
                        stack.append((regsctx, direct_nxt))

                    if add_if_code:
                        added_xrefs.append((ad, nxt_jmp))

                    if self.db.mem.is_func(nxt_jmp):
                        has_ret = not self.is_noreturn(nxt_jmp, entry)
                    else:
                        newctx = self.arch_analyzer.clone_regs_context(regsctx)
                        stack.append((newctx, nxt_jmp))
                else:
                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)
                    # TODO : jump tables for conditional jumps ?

            ##### CALL #####
            elif self.is_call(inst):
                op = inst.operands[-1]
                call_ad = None

                if op.type == self.ARCH_UTILS.OP_IMM:
                    call_ad = unsigned(op.value.imm)
                elif op.type == self.ARCH_UTILS.OP_REG:
                    # FIXME : for MIPS, addresses are loaded in t9 (generally)
                    # then jalr t9 is executed. The problem here is that we
                    # will analyze twice the function. The first time is done
                    # by the function analyze_imm.
                    call_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg)
                else:
                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)

                if call_ad is not None:
                    last_call = call_ad
                    self.api.add_xref(ad, call_ad)

                    if add_if_code:
                        added_xrefs.append((ad, call_ad))

                    self.analyze_flow(
                            call_ad,
                            entry_is_func=True,
                            force=False,
                            add_if_code=add_if_code)

                    # Reset the stack pointer to frame_size to handle stdcall.
                    if frame_size != -1:
                        sp_after_push = self.arch_analyzer.get_sp(regsctx)
                        if frame_size != - sp_after_push:
                            self.arch_analyzer.set_sp(regsctx, -frame_size)

                    if self.db.mem.is_func(call_ad):
                        if self.is_noreturn(call_ad, entry):
                            self.__add_prefetch(inner_code, inst)
                            continue

                nxt = inst.address + inst.size
                stack.append((regsctx, nxt))

            ##### OTHERS #####
            else:
                if frame_size != -1:
                    sp_before = self.arch_analyzer.get_sp(regsctx)

                self.arch_analyzer.analyze_operands(
                        self, regsctx, inst, func_obj, False)

                # Restore the stack pointer to sp_after_push to handle cdecl.
                if frame_size != -1:
                    curr_sp = self.arch_analyzer.get_sp(regsctx)
                    if curr_sp != sp_before and \
                            self.handle_cdecl(frame_size, sp_after_push, curr_sp):

                        new_sp = sp_after_push - sp_before - curr_sp
                        self.arch_analyzer.set_sp(regsctx, new_sp)

                        if last_call is not None and self.db.mem.is_func(last_call):
                            self.functions[last_call][FUNC_FLAGS] |= FUNC_FLAG_CDECL

                        if self.gctx.debugsp:
                            ALL_SP[ad] = sp_after_push

                        sp_after_push = 0

                nxt = inst.address + inst.size
                if nxt not in self.functions:
                    stack.append((regsctx, nxt))

        # Remove all xrefs, this is not a correct flow
        if add_if_code and has_bad_inst:
            for from_ad, to_ad in added_xrefs:
                if isinstance(to_ad, list):
                    self.api.rm_xrefs_table(from_ad, to_ad)
                else:
                    self.api.rm_xref(from_ad, to_ad)
            return -1

        # Set function flags
        flags = self.import_flags(entry)
        if flags == 0 and not has_ret:
            flags = FUNC_FLAG_NORETURN

        return flags
Ejemplo n.º 15
0
    def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code):
        # If entry is not "code", we have to rollback added xrefs
        has_bad_inst = False
        if add_if_code:
            added_xrefs = []

        regsctx = self.arch_analyzer.new_regs_context()
        if regsctx is None:
            # fatal error, but don't quit to let the user save the database
            return False

        flags = 0
        stack_err = False
        args_restore = 0
        if func_obj is not None:
            frame_size = func_obj[FUNC_FRAME_SIZE]
            if frame_size == -1:
                frame_size = self.ARCH_UTILS.guess_frame_size(self, entry)
                # used in arch/*/analyzer.c
                func_obj[FUNC_FRAME_SIZE] = frame_size
        else:
            frame_size = -1

        ret_found = False
        stack = [(regsctx, entry)]

        while stack:
            (regsctx, ad) = stack.pop()

            if self.db.mem.is_data(ad):
                continue

            inst = self.disasm(ad)

            if inst is None:
                has_bad_inst = True
                if add_if_code:
                    break
                continue

            if ad in inner_code:
                continue

            if self.gctx.debugsp:
                ALL_SP[ad] = self.arch_analyzer.get_sp(regsctx)

            inner_code[ad] = inst

            ##### RETURN #####
            if self.is_ret(inst):
                self.__add_prefetch(regsctx, inst, func_obj, inner_code)
                ret_found = True

                if self.dis.is_x86 and len(inst.operands) == 1:
                    args_restore = inst.operands[0].value.imm
                    flags |= FUNC_FLAG_STDCALL

                if self.arch_analyzer.get_sp(regsctx) != 0:
                    flags |= FUNC_FLAG_ERR_STACK_ANALYSIS

            ##### UNCONDITIONAL JUMP #####
            elif self.is_uncond_jump(inst):
                self.__add_prefetch(regsctx, inst, func_obj, inner_code)
                op = inst.operands[-1]
                jmp_ad = None

                if op.type == self.ARCH_UTILS.OP_IMM:
                    jmp_ad = unsigned(op.value.imm)

                else:
                    is_jmptable = inst.address in self.jmptables

                    # Create a jumptable if necessary
                    if not is_jmptable:
                        if op.type == self.ARCH_UTILS.OP_REG:
                            jmp_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg)
                            if jmp_ad is None:
                                is_jmptable = self.auto_jump_table(inst, inner_code)

                        elif op.type == self.ARCH_UTILS.OP_MEM:
                            self.arch_analyzer.analyze_operands(
                                    self, regsctx, inst, func_obj, False)
                            is_jmptable = self.auto_jump_table(inst, inner_code)

                    if is_jmptable:
                        table = self.jmptables[inst.address].table
                        for n in table:
                            r = self.arch_analyzer.clone_regs_context(regsctx)
                            stack.append((r, n))
                        self.api.add_xrefs_table(ad, table)
                        if add_if_code:
                            added_xrefs.append((ad, table))
                        continue

                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)
                    # TODO: assume there is return
                    if jmp_ad is None:
                        if entry in self.db.imports:
                            ret_found |= self.db.imports[entry] & FUNC_FLAG_NORETURN
                        else:
                            ret_found = True
                        continue

                self.api.add_xref(ad, jmp_ad)
                if self.db.mem.is_func(jmp_ad):
                    ret_found |= not self.is_func_noreturn(jmp_ad, entry)
                    fo = self.functions[jmp_ad]
                    flags = fo[FUNC_FLAGS]
                    frame_size = max(fo[FUNC_FRAME_SIZE], frame_size)
                    args_restore = fo[FUNC_ARGS_RESTORE]
                else:
                    stack.append((regsctx, jmp_ad))
                if add_if_code:
                    added_xrefs.append((ad, jmp_ad))

            ##### CONDITIONAL JUMP #####
            elif self.is_cond_jump(inst):
                prefetch = self.__add_prefetch(regsctx, inst, func_obj, inner_code)

                op = inst.operands[-1]
                if op.type == self.ARCH_UTILS.OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(unsigned(op.value.imm))
                    self.api.add_xref(ad, nxt_jmp)

                    if self.db.mem.is_func(direct_nxt):
                        ret_found |= not self.is_func_noreturn(direct_nxt, entry)
                        fo = self.functions[direct_nxt]
                        flags = fo[FUNC_FLAGS]
                        frame_size = max(fo[FUNC_FRAME_SIZE], frame_size)
                        args_restore = fo[FUNC_ARGS_RESTORE]
                    else:
                        stack.append((regsctx, direct_nxt))

                    if add_if_code:
                        added_xrefs.append((ad, nxt_jmp))

                    if self.db.mem.is_func(nxt_jmp):
                        ret_found |= not self.is_func_noreturn(nxt_jmp, entry)
                    else:
                        newctx = self.arch_analyzer.clone_regs_context(regsctx)
                        stack.append((newctx, nxt_jmp))
                else:
                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)
                    # TODO : jump tables for conditional jumps ?

            ##### CALL #####
            elif self.is_call(inst):
                op = inst.operands[-1]
                call_ad = None
                sp_before = self.arch_analyzer.get_sp(regsctx)

                if op.type == self.ARCH_UTILS.OP_IMM:
                    call_ad = unsigned(op.value.imm)
                elif op.type == self.ARCH_UTILS.OP_REG:
                    # FIXME : for MIPS, addresses are loaded in t9 (generally)
                    # then jalr t9 is executed. The problem here is that we
                    # will analyze twice the function. The first time is done
                    # by the function analyze_imm.
                    call_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg)
                else:
                    self.arch_analyzer.analyze_operands(
                            self, regsctx, inst, func_obj, False)
                    if self.db.mem.is_func(op.mem.disp) and \
                            self.is_func_noreturn(op.mem.disp, entry):
                        self.__add_prefetch(regsctx, inst, func_obj, inner_code)
                        continue

                if call_ad is not None:
                    self.api.add_xref(ad, call_ad)

                    if add_if_code:
                        added_xrefs.append((ad, call_ad))

                    self.analyze_flow(
                            call_ad,
                            entry_is_func=True,
                            force=False,
                            add_if_code=add_if_code)

                    # TODO: if the address was alredy in the pending list
                    # we don't have a computed args size
                    # Reset the stack pointer to frame_size to handle stdcall.
                    if frame_size != -1 and call_ad in self.functions:
                        fo = self.functions[call_ad]
                        if fo is not None:
                            n = fo[FUNC_ARGS_RESTORE]
                            if n:
                                self.arch_analyzer.set_sp(regsctx, sp_before + n)

                    if self.db.mem.is_func(call_ad) and \
                            self.is_func_noreturn(call_ad, entry):
                        self.__add_prefetch(regsctx, inst, func_obj, inner_code)
                        continue

                # It seems it doesn't matter for the prefetched instruction
                nxt = inst.address + inst.size
                stack.append((regsctx, nxt))

            ##### OTHERS #####
            else:
                self.arch_analyzer.analyze_operands(
                        self, regsctx, inst, func_obj, False)

                nxt = inst.address + inst.size
                if nxt not in self.functions:
                    stack.append((regsctx, nxt))

        # Remove all xrefs, this is not a correct flow
        if add_if_code and has_bad_inst:
            for from_ad, to_ad in added_xrefs:
                if isinstance(to_ad, list):
                    self.api.rm_xrefs_table(from_ad, to_ad)
                else:
                    self.api.rm_xref(from_ad, to_ad)
            return False

        if func_obj is not None:
            if entry in self.db.imports:
                if self.db.imports[entry] & FUNC_FLAG_NORETURN:
                    flags |= FUNC_FLAG_NORETURN
            elif not ret_found:
                flags |= FUNC_FLAG_NORETURN

            func_obj[FUNC_FLAGS] = flags
            func_obj[FUNC_FRAME_SIZE] = frame_size
            func_obj[FUNC_ARGS_RESTORE] = args_restore

        return True