예제 #1
0
    def _imm(self,
             imm,
             op_size,
             hexa,
             section=None,
             print_data=True,
             force_dont_print_data=False):

        if self.gctx.capstone_string != 0:
            hexa = True

        if hexa:
            imm = unsigned(imm)

        label_printed = self._label(imm, print_colon=False)

        if label_printed:
            ty = self._dis.mem.get_type(imm)
            # ty == -1 : from the terminal (with -x) there are no xrefs if
            # the file was loaded without a database.
            if imm in self._dis.xrefs and ty != MEM_UNK and \
                    ty != MEM_ASCII or ty == -1:
                return True

            if ty == MEM_ASCII:
                print_data = True
                force_dont_print_data = False

        if section is None:
            section = self._binary.get_section(imm)

        if section is not None and section.start == 0:
            section = None

        # For a raw file, if the raw base is 0 the immediate is considered
        # as an address only if it's in the symbols list.
        raw_base_zero = self._binary.type == T_BIN_RAW and self.gctx.raw_base == 0

        if section is not None and not raw_base_zero:
            if not label_printed:
                self._address(imm, print_colon=False, notprefix=True)

            if not force_dont_print_data and print_data:
                s = self._binary.get_string(imm, self.gctx.max_data_size)
                if s is not None:
                    self._add(" ")
                    self._string('"' + s + '"')

            return True

        if label_printed:
            return True

        if op_size == 1:
            self._string("'%s'" % get_char(imm))
        elif hexa:
            self._add(hex(imm))
        else:
            self._add(str(imm))

            if imm > 0:
                if op_size == 4:
                    packed = struct.pack("<L", imm)
                elif op_size == 8:
                    packed = struct.pack("<Q", imm)
                else:
                    return True
                if set(packed).issubset(BYTES_PRINTABLE_SET):
                    self._string(" \"" + "".join(map(chr, packed)) + "\"")
                    return False

            # returns True because capstone print immediate in hexa and
            # it will be printed in a comment, sometimes it's better
            # to have the value in hexa
            return True

        return False
예제 #2
0
    def analyze_operands(self, i, func_obj):
        b = self.dis.binary

        for op in i.operands:
            if op.type == self.CS_OP_IMM:
                val = unsigned(op.value.imm)

            elif op.type == self.CS_OP_MEM and op.mem.disp != 0:

                if self.is_x86:
                    if op.mem.segment != 0:
                        continue
                    if op.mem.index == 0:
                        # Compute the rip register
                        if op.mem.base == self.X86_REG_EIP or \
                            op.mem.base == self.X86_REG_RIP:
                            val = i.address + i.size + unsigned(op.mem.disp)

                        # Check if it's a stack variable
                        elif (op.mem.base == self.X86_REG_EBP or \
                              op.mem.base == self.X86_REG_RBP):
                            if func_obj is not None:
                                ty = self.dis.mem.find_type(op.size)
                                func_obj[FUNC_VARS][op.mem.disp] = [ty, None]
                            # Continue the loop !!
                            continue
                        else:
                            val = unsigned(op.mem.disp)
                    else:
                        val = unsigned(op.mem.disp)

                # TODO: stack variables for arm/mips

                elif self.is_arm:
                    if op.mem.index == 0 and op.mem.base == self.ARM_REG_PC:
                        val = i.address + i.size * 2 + op.mem.disp
                    else:
                        val = op.mem.disp

                elif self.is_mips:
                    if op.mem.base == self.MIPS_REG_GP:
                        if self.dis.mips_gp == -1:
                            continue
                        val = op.mem.disp + self.dis.mips_gp
                    else:
                        val = op.mem.disp
            else:
                continue

            s = b.get_section(val)
            if s is None or s.start == 0:
                continue

            self.dis.add_xref(i.address, val)

            if not self.dis.mem.exists(val):
                sz = op.size if self.has_op_size else self.default_size
                deref = s.read_int(val, sz)

                # If (*val) is an address
                if deref is not None and b.is_address(deref):
                    ty = MEM_OFFSET
                    self.dis.add_xref(val, deref)

                    if not self.dis.mem.exists(deref):
                        self.dis.mem.add(deref, 1, MEM_UNK)

                        # Do an anlysis on this value.
                        if deref not in self.pending and \
                                deref not in self.pending_not_curr and \
                                self.first_inst_are_code(deref):

                            self.pending_not_curr.add(deref)
                            self.msg.put((deref, self.has_prolog(deref), False,
                                          True, None))
                else:
                    # Check if this is an address to a string
                    sz = b.is_string(val)
                    if sz != 0:
                        ty = MEM_ASCII
                    else:
                        sz = op.size if self.has_op_size else self.default_size
                        if op.type == self.CS_OP_MEM:
                            ty = self.dis.mem.find_type(sz)
                        else:
                            ty = MEM_UNK

                self.dis.mem.add(val, sz, ty)

                if ty == MEM_UNK:
                    # Do an analysis on this value, if this is not code
                    # nothing will be done.
                    # jumps and calls are already analyzed in analyze_flow.
                    if val not in self.pending and \
                            not (self.is_jump(i) or self.is_call(i)) and \
                            val not in self.pending_not_curr and \
                            self.first_inst_are_code(val):

                        self.pending_not_curr.add(val)
                        self.msg.put(
                            (val, self.has_prolog(val), False, True, None))
예제 #3
0
    def __sub_analyze_flow(self, entry, inner_code, add_if_code):
        if self.dis.binary.get_section(entry) is None:
            return -1

        stack = [entry]
        has_ret = False

        # If entry is not "code", we have to rollback added xrefs
        has_bad_inst = False
        if add_if_code:
            added_xrefs = []

        while stack:
            ad = stack.pop()
            inst = self.disasm(ad)

            if inst is None:
                has_bad_inst = True
                if add_if_code:
                    break
                continue

            if ad in inner_code:
                continue

            inner_code[ad] = inst

            if self.is_ret(inst):
                self.__add_prefetch(inner_code, inst)
                has_ret = True

            elif self.is_uncond_jump(inst):
                self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]

                if op.type == self.CS_OP_IMM:
                    nxt = unsigned(op.value.imm)
                    self.dis.add_xref(ad, nxt)
                    if nxt in self.functions:
                        has_ret = not self.is_noreturn(nxt, entry)
                    else:
                        stack.append(nxt)
                    if add_if_code:
                        added_xrefs.append((ad, nxt))
                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        self.dis.add_xref(ad, table)
                        if add_if_code:
                            added_xrefs.append((ad, table))
                    else:
                        # TODO
                        # This is a register or a memory access
                        # we can't say if the function really returns
                        has_ret = True

            elif self.is_cond_jump(inst):
                prefetch = self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]
                if op.type == self.CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(unsigned(op.value.imm))
                    self.dis.add_xref(ad, nxt_jmp)
                    stack.append(direct_nxt)

                    if add_if_code:
                        added_xrefs.append((ad, nxt_jmp))

                    if nxt_jmp in self.functions:
                        has_ret = not self.is_noreturn(nxt_jmp, entry)
                    else:
                        stack.append(nxt_jmp)

            elif self.is_call(inst):
                op = inst.operands[-1]
                if op.type == self.CS_OP_IMM:
                    imm = unsigned(op.value.imm)
                    self.dis.add_xref(ad, imm)

                    if add_if_code:
                        added_xrefs.append((ad, imm))

                    if imm not in self.functions:
                        self.analyze_flow(imm, True, False, add_if_code)

                    if imm in self.functions and self.is_noreturn(imm, entry):
                        self.__add_prefetch(inner_code, inst)
                        continue

                nxt = inst.address + inst.size
                stack.append(nxt)

            else:
                nxt = inst.address + inst.size
                stack.append(nxt)

        if add_if_code and has_bad_inst:
            for from_ad, to_ad in added_xrefs:
                self.dis.rm_xrefs(from_ad, to_ad)
            return -1

        # for ELF
        if entry in self.dis.binary.imports:
            flags = self.import_flags(entry)
        elif has_ret:
            flags = 0
        else:
            flags = FUNC_FLAG_NORETURN

        return flags
예제 #4
0
    def __sub_analyze_flow(self, entry, inner_code, add_if_code):
        if self.dis.binary.get_section(entry) is None:
            return -1

        stack = [entry]
        has_ret = False

        # If entry is not "code", we have to rollback added xrefs
        has_bad_inst = False
        if add_if_code:
            added_xrefs = []

        while stack:
            ad = stack.pop()
            inst = self.disasm(ad)

            if inst is None:
                has_bad_inst = True
                if add_if_code:
                    break
                continue

            if ad in inner_code:
                continue

            inner_code[ad] = inst

            if self.is_ret(inst):
                self.__add_prefetch(inner_code, inst)
                has_ret = True

            elif self.is_uncond_jump(inst):
                self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]

                if op.type == self.CS_OP_IMM:
                    nxt = unsigned(op.value.imm)
                    self.api.add_xref(ad, nxt)
                    if self.db.mem.is_func(nxt):
                        has_ret = not self.is_noreturn(nxt, entry)
                    else:
                        stack.append(nxt)
                    if add_if_code:
                        added_xrefs.append((ad, nxt))
                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        self.api.add_xref(ad, table)
                        if add_if_code:
                            added_xrefs.append((ad, table))
                    else:
                        # TODO
                        # This is a register or a memory access
                        # we can't say if the function really returns
                        has_ret = True

            elif self.is_cond_jump(inst):
                prefetch = self.__add_prefetch(inner_code, inst)

                op = inst.operands[-1]
                if op.type == self.CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(unsigned(op.value.imm))
                    self.api.add_xref(ad, nxt_jmp)
                    stack.append(direct_nxt)

                    if add_if_code:
                        added_xrefs.append((ad, nxt_jmp))

                    if self.db.mem.is_func(nxt_jmp):
                        has_ret = not self.is_noreturn(nxt_jmp, entry)
                    else:
                        stack.append(nxt_jmp)

            elif self.is_call(inst):
                op = inst.operands[-1]
                if op.type == self.CS_OP_IMM:
                    imm = unsigned(op.value.imm)
                    self.api.add_xref(ad, imm)

                    if add_if_code:
                        added_xrefs.append((ad, imm))

                    if not self.db.mem.is_func(imm):
                        self.analyze_flow(imm, True, False, add_if_code)

                    if self.db.mem.is_func(imm) and self.is_noreturn(imm, entry):
                        self.__add_prefetch(inner_code, inst)
                        continue

                nxt = inst.address + inst.size
                stack.append(nxt)

            else:
                nxt = inst.address + inst.size
                stack.append(nxt)

        if add_if_code and has_bad_inst:
            for from_ad, to_ad in added_xrefs:
                self.api.rm_xrefs(from_ad, to_ad)
            return -1

        # for ELF
        if entry in self.dis.binary.imports:
            flags = self.import_flags(entry)
        elif has_ret:
            flags = 0
        else:
            flags = FUNC_FLAG_NORETURN

        return flags
예제 #5
0
    def _imm(self, imm, op_size, hexa, section=None, print_data=True,
             force_dont_print_data=False):

        if self.gctx.capstone_string != 0:
            hexa = True

        if hexa:
            imm = unsigned(imm)

        label_printed = self._label(imm, print_colon=False)

        if label_printed:
            ty = self._dis.mem.get_type(imm)
            # ty == -1 : from the terminal (with -x) there are no xrefs if
            # the file was loaded without a database.
            if imm in self._dis.xrefs and ty != MEM_UNK and \
                    ty != MEM_ASCII or ty == -1:
                return True

            if ty == MEM_ASCII:
                print_data = True
                force_dont_print_data = False

        if section is None:
            section = self._binary.get_section(imm)

        if section is not None and section.start == 0:
            section = None

        # For a raw file, if the raw base is 0 the immediate is considered
        # as an address only if it's in the symbols list.
        raw_base_zero = self._binary.type == T_BIN_RAW and self.gctx.raw_base == 0

        if section is not None and not raw_base_zero:
            if not label_printed:
                self._address(imm, print_colon=False, notprefix=True)

            if not force_dont_print_data and print_data:
                s = self._binary.get_string(imm, self.gctx.max_data_size)
                if s is not None:
                    self._add(" ")
                    self._string('"' + s + '"')

            return True

        if label_printed:
            return True

        if op_size == 1:
            self._string("'%s'" % get_char(imm))
        elif hexa:
            self._add(hex(imm))
        else:
            self._add(str(imm))

            if imm > 0:
                if op_size == 4:
                    packed = struct.pack("<L", imm)
                elif op_size == 8:
                    packed = struct.pack("<Q", imm)
                else:
                    return True
                if set(packed).issubset(BYTES_PRINTABLE_SET):
                    self._string(" \"" + "".join(map(chr, packed)) + "\"")
                    return False

            # returns True because capstone print immediate in hexa and
            # it will be printed in a comment, sometimes it's better
            # to have the value in hexa
            return True

        return False
예제 #6
0
    def analyze_operands(self, i, func_obj):
        b = self.dis.binary

        for op in i.operands:
            if op.type == self.CS_OP_IMM:
                val = unsigned(op.value.imm)

            elif op.type == self.CS_OP_MEM and op.mem.disp != 0:

                if self.is_x86:
                    if op.mem.segment != 0:
                        continue
                    if op.mem.index == 0:
                        # Compute the rip register
                        if op.mem.base == self.X86_REG_EIP or \
                            op.mem.base == self.X86_REG_RIP:
                            val = i.address + i.size + unsigned(op.mem.disp)

                        # Check if it's a stack variable
                        elif (op.mem.base == self.X86_REG_EBP or \
                              op.mem.base == self.X86_REG_RBP):
                            if func_obj is not None:
                                ty = self.db.mem.find_type(op.size)
                                func_obj[FUNC_VARS][op.mem.disp] = [ty, None]
                            # Continue the loop !!
                            continue
                        else:
                            val = unsigned(op.mem.disp)
                    else:
                        val = unsigned(op.mem.disp)

                # TODO: stack variables for arm/mips

                elif self.is_arm:
                    if op.mem.index == 0 and op.mem.base == self.ARM_REG_PC:
                        val = i.address + i.size * 2 + op.mem.disp
                    else:
                        val = op.mem.disp

                elif self.is_mips:
                    if op.mem.base == self.MIPS_REG_GP:
                        if self.dis.mips_gp == -1:
                            continue
                        val = op.mem.disp + self.dis.mips_gp
                    else:
                        val = op.mem.disp
            else:
                continue

            s = b.get_section(val)
            if s is None or s.start == 0:
                continue

            self.api.add_xref(i.address, val)

            if not self.db.mem.exists(val):
                sz = op.size if self.has_op_size else self.default_size
                deref = s.read_int(val, sz)

                # If (*val) is an address
                if deref is not None and b.is_address(deref):
                    ty = MEM_OFFSET
                    self.api.add_xref(val, deref)

                    if not self.db.mem.exists(deref):
                        self.db.mem.add(deref, 1, MEM_UNK)

                        # Do an anlysis on this value.
                        if deref not in self.pending and \
                                deref not in self.pending_not_curr and \
                                self.first_inst_are_code(deref):

                            self.pending_not_curr.add(deref)
                            self.msg.put(
                                (deref, self.has_prolog(deref), False, True, None))
                else:
                    # Check if this is an address to a string
                    sz = b.is_string(val)
                    if sz != 0:
                        ty = MEM_ASCII
                    else:
                        sz = op.size if self.has_op_size else self.default_size
                        if op.type == self.CS_OP_MEM:
                            ty = self.db.mem.find_type(sz)
                        else:
                            ty = MEM_UNK

                self.db.mem.add(val, sz, ty)

                if ty == MEM_UNK:
                    # Do an analysis on this value, if this is not code
                    # nothing will be done.
                    # jumps and calls are already analyzed in analyze_flow.
                    if val not in self.pending and \
                            not (self.is_jump(i) or self.is_call(i)) and \
                            val not in self.pending_not_curr and \
                            self.first_inst_are_code(val):

                        self.pending_not_curr.add(val)
                        self.msg.put(
                            (val, self.has_prolog(val), False, True, None))
예제 #7
0
    def get_graph(self, entry):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        self.CS_ARCH_MIPS = CS_ARCH_MIPS
        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry)
        stack = [entry]
        start = time()
        prefetch = None
        addresses = set()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            addresses.add(ad)

            if ARCH_UTILS.is_ret(inst):
                prefetch = self.__add_prefetch(addresses, inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == CS_OP_IMM:
                    nxt = unsigned(op.value.imm)

                    if nxt in self.functions:
                        gph.new_node(inst, prefetch, None)
                    else:
                        stack.append(nxt)
                        gph.new_node(inst, prefetch, [nxt])

                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(op.value.imm)
                    stack.append(direct_nxt)

                    if nxt_jmp in self.functions:
                        gph.new_node(inst, prefetch, [direct_nxt])
                    else:
                        stack.append(nxt_jmp)
                        gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                if ad != entry and ARCH_UTILS.is_call(inst):
                    op = inst.operands[0]
                    if op.type == CS_OP_IMM:
                        imm = unsigned(op.value.imm)
                        if imm in self.functions and self.is_noreturn(imm):
                            prefetch = self.__add_prefetch(addresses, inst)
                            gph.new_node(inst, prefetch, None)
                            continue

                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.pe_reverse_stripped_list(self, addresses)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" %
                (elapsed, len(gph.nodes)))

        return gph, nb_new_syms
예제 #8
0
    def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1):
        from capstone import CS_OP_IMM

        ARCH = self.load_arch_module()
        ARCH_OUTPUT = ARCH.output
        ARCH_UTILS = ARCH.utils

        ad = ctx.entry
        s = self.binary.get_section(ad)

        if s is None:
            # until is != -1 only from the visual mode
            # It allows to not go before the first section.
            if until != -1:
                return None
            # Get the next section, it's not mandatory that sections
            # are consecutives !
            s = self.binary.get_next_section(ad)
            if s is None:
                return None
            ad = s.start

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()
        o.section_prefix = True
        o.curr_section = s
        o.mode_dump = True
        l = 0
        api = ctx.gctx.api

        while 1:
            if ad == s.start:
                if not o.is_last_2_line_empty():
                    o._new_line()
                o._dash()
                o._section(s.name)
                o._add("  0x%x -> 0x%x" % (s.start, s.end))
                o._new_line()
                o._new_line()

            while ((l < lines and until == -1) or (ad < until and until != -1)) \
                    and ad <= s.end:

                ty = self.mem.get_type(ad)

                # A PE import should not be displayed as a subroutine
                if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \
                        and self.mem.is_code(ad):

                    is_func = ad in self.functions

                    if is_func:
                        if not o.is_last_2_line_empty():
                            o._new_line()
                        o._dash()
                        o._user_comment("; SUBROUTINE")
                        o._new_line()
                        o._dash()

                    i = self.lazy_disasm(ad, s.start)

                    if not is_func and ad in self.xrefs and \
                            not o.is_last_2_line_empty():
                        o._new_line()

                    o._asm_inst(i)

                    if ad in self.end_functions:
                        for fad in self.end_functions[ad]:
                            sy = api.get_symbol(fad)
                            o._user_comment("; end function %s" % sy)
                            o._new_line()
                        o._new_line()

                    elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i):
                        o._new_line()

                    elif ARCH_UTILS.is_call(i):
                        op = i.operands[0]
                        if op.type == CS_OP_IMM:
                            imm = unsigned(op.value.imm)
                            if imm in self.functions and self.is_noreturn(imm):
                                o._new_line()

                    ad += i.size

                elif ty == MEM_OFFSET:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    off = s.read_int(ad, sz)
                    if off is None:
                        continue
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._data_prefix(sz)
                    o._add(" ")
                    o._imm(off,
                           sz,
                           True,
                           print_data=False,
                           force_dont_print_data=True)
                    o._new_line()
                    ad += sz

                elif ty == MEM_ASCII:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    buf = self.binary.get_string(ad, sz)
                    if buf is not None:
                        if ctx.gctx.print_bytes:
                            o._bytes(s.read(ad, sz))
                        o._string('"' + buf + '"')
                    o._add(", 0")
                    o._new_line()
                    ad += sz

                else:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size_from_type(ty)
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._word(s.read_int(ad, sz), sz)
                    o._new_line()
                    ad += sz

                l += 1

            s = self.binary.get_section(ad)
            if s is None:
                # Get the next section, it's not mandatory that sections
                # are consecutives !
                s = self.binary.get_next_section(ad)
                if s is None:
                    break
                o._new_line()
                ad = s.start
                if until != -1 and ad >= until:
                    break

            if (l >= lines and until == -1) or (ad >= until and until != -1):
                break

            o.curr_section = s

        if until == ad:
            if self.mem.is_code(ad) and ad in self.xrefs or ad == s.start:
                if not o.is_last_2_line_empty():
                    o._new_line()

        # remove the last empty line
        o.lines.pop(-1)
        o.token_lines.pop(-1)

        o.join_lines()

        return o
예제 #9
0
    def get_graph(self, entry):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        self.CS_ARCH_MIPS = CS_ARCH_MIPS
        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry)
        stack = [entry]
        start = time()
        prefetch = None
        addresses = set()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            addresses.add(ad)

            if ARCH_UTILS.is_ret(inst):
                prefetch = self.__add_prefetch(addresses, inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == CS_OP_IMM:
                    nxt = unsigned(op.value.imm)

                    if nxt in self.functions:
                        gph.new_node(inst, prefetch, None)
                    else:
                        stack.append(nxt)
                        gph.new_node(inst, prefetch, [nxt])

                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = unsigned(op.value.imm)
                    stack.append(direct_nxt)

                    if nxt_jmp in self.functions:
                        gph.new_node(inst, prefetch, [direct_nxt])
                    else:
                        stack.append(nxt_jmp)
                        gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                if ad != entry and ARCH_UTILS.is_call(inst):
                    op = inst.operands[0]
                    if op.type == CS_OP_IMM:
                        imm = unsigned(op.value.imm)
                        if imm in self.functions and self.is_noreturn(imm):
                            prefetch = self.__add_prefetch(addresses, inst)
                            gph.new_node(inst, prefetch, None)
                            continue

                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.pe_reverse_stripped_list(self, addresses)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes)))

        return gph, nb_new_syms
예제 #10
0
    def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1):
        from capstone import CS_OP_IMM

        ARCH = self.load_arch_module()
        ARCH_OUTPUT = ARCH.output
        ARCH_UTILS = ARCH.utils

        ad = ctx.entry
        s = self.binary.get_section(ad)

        if s is None:
            # until is != -1 only from the visual mode
            # It allows to not go before the first section.
            if until != -1: 
                return None
            # Get the next section, it's not mandatory that sections
            # are consecutives !
            s = self.binary.get_next_section(ad)
            if s is None:
                return None
            ad = s.start

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()
        o.section_prefix = True
        o.curr_section = s
        o.mode_dump = True
        l = 0
        api = ctx.gctx.api

        while 1:
            if ad == s.start:
                if not o.is_last_2_line_empty():
                    o._new_line()
                o._dash()
                o._section(s.name)
                o._add("  0x%x -> 0x%x" % (s.start, s.end))
                o._new_line()
                o._new_line()

            while ((l < lines and until == -1) or (ad < until and until != -1)) \
                    and ad <= s.end:

                ty = self.mem.get_type(ad)

                # A PE import should not be displayed as a subroutine
                if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \
                        and self.mem.is_code(ad):

                    is_func = ad in self.functions

                    if is_func:
                        if not o.is_last_2_line_empty():
                            o._new_line()
                        o._dash()
                        o._user_comment("; SUBROUTINE")
                        o._new_line()
                        o._dash()

                    i = self.lazy_disasm(ad, s.start)

                    if not is_func and ad in self.xrefs and \
                            not o.is_last_2_line_empty():
                        o._new_line()

                    o._asm_inst(i)

                    if ad in self.end_functions:
                        for fad in self.end_functions[ad]:
                            sy = api.get_symbol(fad)
                            o._user_comment("; end function %s" % sy)
                            o._new_line()
                        o._new_line()

                    elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i):
                        o._new_line()

                    elif ARCH_UTILS.is_call(i):
                        op = i.operands[0]
                        if op.type == CS_OP_IMM:
                            imm = unsigned(op.value.imm)
                            if imm in self.functions and self.is_noreturn(imm):
                                o._new_line()

                    ad += i.size

                elif ty == MEM_OFFSET:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    off = s.read_int(ad, sz)
                    if off is None:
                        continue
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._data_prefix(sz)
                    o._add(" ")
                    o._imm(off, sz, True, print_data=False, force_dont_print_data=True)
                    o._new_line()
                    ad += sz

                elif ty == MEM_ASCII:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size(ad)
                    buf = self.binary.get_string(ad, sz)
                    if buf is not None:
                        if ctx.gctx.print_bytes:
                            o._bytes(s.read(ad, sz))
                        o._string('"' + buf + '"')
                    o._add(", 0")
                    o._new_line()
                    ad += sz

                else:
                    o._label_and_address(ad)
                    o.set_line(ad)
                    sz = self.mem.get_size_from_type(ty)
                    if ctx.gctx.print_bytes:
                        o._bytes(s.read(ad, sz))
                    o._word(s.read_int(ad, sz), sz)
                    o._new_line()
                    ad += sz

                l += 1

            s = self.binary.get_section(ad)
            if s is None:
                # Get the next section, it's not mandatory that sections
                # are consecutives !
                s = self.binary.get_next_section(ad)
                if s is None:
                    break
                o._new_line()
                ad = s.start
                if until != -1 and ad >= until:
                    break

            if (l >= lines and until == -1) or (ad >= until and until != -1):
                break

            o.curr_section = s

        if until == ad:
            if self.mem.is_code(ad) and ad in self.xrefs or ad == s.start:
                if not o.is_last_2_line_empty():
                    o._new_line()

        # remove the last empty line
        o.lines.pop(-1)
        o.token_lines.pop(-1)

        o.join_lines()

        return o