예제 #1
0
class Disassembler():
    def __init__(self, filename, raw_type, raw_base, raw_big_endian):
        import capstone as CAPSTONE

        self.code = {}
        self.binary = Binary(filename, raw_type, raw_base, raw_big_endian)

        arch, mode = self.binary.get_arch()

        if arch is None or mode is None:
            raise ExcArch(self.binary.get_arch_string())

        self.binary.load_extra()

        self.md = CAPSTONE.Cs(arch, mode)
        self.md.detail = True
        self.arch = arch
        self.mode = mode


    def check_addr(self, ctx, addr):
        addr_exists, is_exec = self.binary.check_addr(addr)
        if not ctx.print_data and not is_exec:
            raise ExcNotExec(addr)
        if not addr_exists:
            raise ExcNotAddr(addr)


    def load_arch_module(self):
        import capstone as CAPSTONE
        if self.arch == CAPSTONE.CS_ARCH_X86:
            import lib.arch.x86 as ARCH
        elif self.arch == CAPSTONE.CS_ARCH_ARM:
            import lib.arch.arm as ARCH
        elif self.arch == CAPSTONE.CS_ARCH_MIPS:
            import lib.arch.mips as ARCH
        else:
            raise NotImplementedError
        return ARCH


    def get_addr_from_string(self, opt_addr, raw=False):
        if opt_addr is None:
            if raw:
                return 0
            search = ["main", "_main"]
        else:
            search = [opt_addr]

        for s in search:
            if s.startswith("0x"):
                a = int(opt_addr, 16)
            else:
                a = self.binary.symbols.get(s, -1)

            if a != -1:
                return a

        raise ExcSymNotFound(search[0])


    def print_section_meta(self, name, start, end):
        print_no_end(color_section(name.ljust(20)))
        print_no_end(" [")
        print_no_end(hex(start))
        print_no_end(" - ")
        print_no_end(hex(end))
        print("]")


    def dump_asm(self, ctx, lines):
        from capstone import CS_OP_IMM
        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s_name, s_start, s_end = self.binary.get_section_meta(ctx.entry_addr)
        self.print_section_meta(s_name, s_start, s_end)

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # set jumps color
        ad = ctx.entry_addr
        l = 0
        while l < lines and ad < s_end:
            i = self.lazy_disasm(ad, s_start)
            if i is None:
                ad += 1
            else:
                if ARCH_UTILS.is_jump(i) and i.operands[-1].type == CS_OP_IMM:
                    pick_color(i.operands[-1].value.imm)
                ad += i.size
            l += 1

        # Here we have loaded all instructions we want to print
        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        o = ARCH_OUTPUT.Output(ctx)

        # dump
        ad = ctx.entry_addr
        l = 0
        while l < lines and ad < s_end:
            i = self.lazy_disasm(ad, s_start)
            if i is None:
                ad += 1
                o.print_bad(ad)
            else:
                o.print_inst(i)
                ad += i.size
            l += 1


    def dump_data(self, ctx, lines):
        N = 128
        addr = ctx.entry_addr

        s_name, s_start, s_end = self.binary.get_section_meta(ctx.entry_addr)
        self.print_section_meta(s_name, s_start, s_end)

        addr_ascii_str = -1
        ascii_str = []
        l = 0
        is_new_line = True

        while l < lines:
            buf = self.binary.section_stream_read(addr, N)
            if not buf:
                break
            i = 0

            while i < len(buf):
                c = buf[i]

                if c in BYTES_PRINTABLE_SET:
                    if addr_ascii_str == -1:
                        addr_ascii_str = addr
                    ascii_str.append(c)
                    addr += 1
                    i += 1
                    continue

                stack_char = []

                if addr_ascii_str != -1:
                    if c == 0 and len(ascii_str) >= 2:
                        if not is_new_line:
                            print()
                            l += 1
                            if l >= lines:
                                return

                        print_no_end(color_addr(addr_ascii_str))
                        print_no_end(color_string("\"" + "".join(map(get_char, ascii_str)) + "\""))
                        print(", 0")
                        is_new_line = True
                        l += 1
                        if l >= lines:
                            return
                        ascii_str = []
                        addr_ascii_str = -1
                        addr += 1
                        i += 1
                        continue

                    stack_char = ascii_str
                    i -= len(ascii_str)
                    addr -= len(ascii_str)

                stack_char.append(c)

                for c in stack_char:
                    if is_new_line:
                        print_no_end(color_addr(addr))

                    elif addr % 4 == 0 and addr != ctx.entry_addr:
                        print()
                        is_new_line = True
                        l += 1
                        if l >= lines:
                            return
                        print_no_end(color_addr(addr))

                    print_no_end("%.2x " % c)

                    addr += 1
                    i += 1
                    is_new_line = False

                ascii_str = []
                addr_ascii_str = -1

        if not is_new_line:
            print()


    def print_calls(self, ctx):
        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s_name, s_start, s_end = self.binary.get_section_meta(ctx.entry_addr)
        self.print_section_meta(s_name, s_start, s_end)
        o = ARCH_OUTPUT.Output(ctx)

        ad = s_start
        while ad < s_end:
            i = self.lazy_disasm(ad, s_start)
            if i is None:
                ad += 1
            else:
                ad += i.size
                if ARCH_UTILS.is_call(i):
                    o.print_inst(i)


    def print_symbols(self, print_sections, sym_filter=None):
        if sym_filter is not None:
            sym_filter = sym_filter.lower()

        for addr in self.binary.reverse_symbols:
            sy = self.binary.reverse_symbols[addr]
            if sym_filter is None or sym_filter in sy.lower():
                sec_name, _ = self.binary.is_address(addr)
                if sy:
                    print_no_end(color_addr(addr) + " " +
                                 color_symbol("<" + sy + ">"))
                    if print_sections and sec_name is not None:
                        print_no_end(" (" + color_section(sec_name) + ")")
                    print()


    def load_user_sym_file(self, fd):
        for l in fd:
            arg = l.split()
            addr = int(arg[0], 16)
            self.binary.reverse_symbols[addr] = arg[1]
            self.binary.symbols[arg[1]] = addr


    def lazy_disasm(self, addr, stay_in_section=-1):
        meta  = self.binary.get_section_meta(addr)
        if meta is None:
            return None

        _, start, _ = meta

        if stay_in_section != -1 and start != stay_in_section:
            return None

        if addr in self.code:
            return self.code[addr]
        
        # Disassemble by block of N bytes
        N = 1024

        d = self.binary.section_stream_read(addr, N)
        gen = self.md.disasm(d, addr)

        first = None
        try:
            first = next(gen)
            self.code[first.address] = first

            # Max N instructions (N is in bytes)
            for n in range(N):
                i = next(gen)
                if i.address in self.code:
                    return first
                self.code[i.address] = i
        except StopIteration:
            pass

        return first


    def __prefetch_inst(self, inst):
        return self.lazy_disasm(inst.address + inst.size)


    # Generate a flow graph of the given function (addr)
    def get_graph(self, addr):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        ARCH_UTILS = self.load_arch_module().utils

        curr = self.lazy_disasm(addr)
        if curr == None:
            return None

        gph = Graph(self, addr)
        rest = []
        start = time.clock()
        prefetch = None

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        while 1:
            if not gph.exists(curr):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(curr)

                if ARCH_UTILS.is_uncond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[-1].type == CS_OP_IMM:
                        addr = curr.operands[-1].value.imm
                        nxt = self.lazy_disasm(addr)
                        gph.set_next(curr, nxt, prefetch)
                        rest.append(nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr, prefetch)
                    gph.uncond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_cond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[-1].type == CS_OP_IMM:
                        nxt_jump = self.lazy_disasm(curr.operands[-1].value.imm)

                        if self.arch == CS_ARCH_MIPS:
                            direct_nxt = \
                                self.lazy_disasm(prefetch.address + prefetch.size)
                        else:
                            direct_nxt = \
                                self.lazy_disasm(curr.address + curr.size)

                        gph.set_cond_next(curr, nxt_jump, direct_nxt, prefetch)
                        rest.append(nxt_jump.address)
                        rest.append(direct_nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr, prefetch)
                    gph.cond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_ret(curr):
                    gph.add_node(curr, prefetch)

                else:
                    try:
                        nxt = self.lazy_disasm(curr.address + curr.size)
                        gph.set_next(curr, nxt)
                        rest.append(nxt.address)
                    except:
                        gph.add_node(curr)
                        pass

            try:
                curr = self.lazy_disasm(rest.pop())
            except IndexError:
                break

        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Graph built in %fs" % elapsed)

        return gph
예제 #2
0
class Disassembler():
    def __init__(self, filename, raw_type):
        import capstone as CAPSTONE

        self.code = {}
        self.binary = Binary(filename, raw_type)
        self.raw_type = raw_type

        arch, mode = self.binary.get_arch()

        if arch is None or mode is None:
            raise ExcArch(self.binary.get_arch_string())

        self.md = CAPSTONE.Cs(arch, mode)
        self.md.detail = True
        self.arch = arch
        self.mode = mode


    def check_addr(self, addr):
        addr_exists, is_exec = self.binary.check_addr(addr)
        if not is_exec:
            raise ExcNotExec(addr)
        if not addr_exists:
            raise ExcNotAddr(addr)


    def load_arch_module(self):
        import capstone as CAPSTONE
        if self.arch == CAPSTONE.CS_ARCH_X86:
            import lib.arch.x86 as ARCH
        elif self.arch == CAPSTONE.CS_ARCH_ARM:
            import lib.arch.arm as ARCH
        else:
            raise NotImplementedError
        return ARCH


    def get_addr_from_string(self, opt_addr, raw=False):
        if opt_addr is None:
            if raw:
                return 0
            search = ["main", "_main"]
        else:
            search = [opt_addr]

        for s in search:
            if s.startswith("0x"):
                a = int(opt_addr, 16)
            else:
                a = self.binary.symbols.get(s, -1)

            if a != -1:
                return a

        raise ExcSymNotFound(search[0])


    def dump(self, ctx, lines):
        from capstone import CS_OP_IMM
        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s_start = self.binary.get_section_start(ctx.addr)

        # set jumps color
        i = self.lazy_disasm(ctx.addr, s_start)
        l = 0
        while i is not None and l < lines:
            if ARCH_UTILS.is_jump(i) and i.operands[0].type == CS_OP_IMM:
                pick_color(i.operands[0].value.imm)
            i = self.lazy_disasm(i.address + i.size, s_start)
            l += 1

        # Here we have loaded all instructions we want to print
        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        o = ARCH_OUTPUT.Output(ctx)

        # dump
        i = self.lazy_disasm(ctx.addr, s_start)
        l = 0
        while i is not None and l < lines:
            o.print_inst(i, 0)
            i = self.lazy_disasm(i.address + i.size, s_start)
            l += 1


    def print_calls(self, ctx):
        # Print all calls which are in the section containing ctx.addr

        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s_start = self.binary.get_section_start(ctx.addr)
        o = ARCH_OUTPUT.Output(ctx)

        i = self.lazy_disasm(s_start, s_start)
        while i is not None:
            if ARCH_UTILS.is_call(i):
                o.print_inst(i)
            i = self.lazy_disasm(i.address + i.size, s_start)


    def print_symbols(self, print_sections, sym_filter=None):
        if sym_filter is not None:
            sym_filter = sym_filter.lower()

        for addr in self.binary.reverse_symbols:
            sy = self.binary.reverse_symbols[addr]
            if sym_filter is None or sym_filter in sy.lower():
                sec_name, _ = self.binary.is_address(addr)
                print_no_end(color_addr(addr) + " " + color_symbol("<" + sy + ">"))
                if print_sections and sec_name is not None:
                    print_no_end(" (" + color_section(sec_name) + ")")
                print()


    def load_user_sym_file(self, fd):
        for l in fd:
            arg = l.split()
            addr = int(arg[0], 16)
            self.binary.reverse_symbols[addr] = arg[1]
            self.binary.symbols[arg[1]] = addr


    def lazy_disasm(self, addr, stay_in_section=-1):
        if stay_in_section != -1 and \
                self.binary.get_section_start(addr) != stay_in_section:
            return None

        if addr in self.code:
            return self.code[addr]
        
        # Disassemble by block of N bytes
        N = 1024

        d = self.binary.section_stream_read(addr, N)
        gen = self.md.disasm(d, addr)

        first = None
        try:
            first = next(gen)
            self.code[first.address] = first

            # Max N instructions (N is in bytes)
            for n in range(N):
                i = next(gen)
                if i.address in self.code:
                    return first
                self.code[i.address] = i
        except StopIteration:
            pass

        return first


    # Generate a flow graph of the given function (addr)
    def get_graph(self, addr):
        from capstone import CS_OP_IMM
        ARCH_UTILS = self.load_arch_module().utils

        curr = self.lazy_disasm(addr)
        if curr == None:
            return None

        gph = Graph(self, addr)
        rest = []
        start = time.clock()

        while 1:
            if not gph.exists(curr):
                if ARCH_UTILS.is_uncond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[0].type == CS_OP_IMM:
                        addr = curr.operands[0].value.imm
                        nxt = self.lazy_disasm(addr)
                        gph.set_next(curr, nxt)
                        rest.append(nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr)
                    gph.uncond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_cond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[0].type == CS_OP_IMM:
                        nxt_jump = self.lazy_disasm(curr.operands[0].value.imm)
                        direct_nxt = self.lazy_disasm(curr.address + curr.size)
                        gph.set_cond_next(curr, nxt_jump, direct_nxt)
                        rest.append(nxt_jump.address)
                        rest.append(direct_nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr)
                    gph.cond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_ret(curr):
                    gph.add_node(curr)

                else:
                    try:
                        nxt = self.lazy_disasm(curr.address + curr.size)
                        gph.set_next(curr, nxt)
                        rest.append(nxt.address)
                    except:
                        gph.add_node(curr)
                        pass

            try:
                curr = self.lazy_disasm(rest.pop())
            except IndexError:
                break

        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Graph built in %fs" % elapsed)

        return gph
예제 #3
0
class Disassembler():
    def __init__(self, filename, raw_type, raw_base, raw_big_endian):
        import capstone as CAPSTONE

        self.code = {}
        self.binary = Binary(filename, raw_type, raw_base, raw_big_endian)

        arch, mode = self.binary.get_arch()

        if arch is None or mode is None:
            raise ExcArch(self.binary.get_arch_string())

        self.binary.load_extra()

        self.md = CAPSTONE.Cs(arch, mode)
        self.md.detail = True
        self.arch = arch
        self.mode = mode


    def check_addr(self, addr):
        addr_exists, is_exec = self.binary.check_addr(addr)
        if not is_exec:
            raise ExcNotExec(addr)
        if not addr_exists:
            raise ExcNotAddr(addr)


    def load_arch_module(self):
        import capstone as CAPSTONE
        if self.arch == CAPSTONE.CS_ARCH_X86:
            import lib.arch.x86 as ARCH
        elif self.arch == CAPSTONE.CS_ARCH_ARM:
            import lib.arch.arm as ARCH
        elif self.arch == CAPSTONE.CS_ARCH_MIPS:
            import lib.arch.mips as ARCH
        else:
            raise NotImplementedError
        return ARCH


    def get_addr_from_string(self, opt_addr, raw=False):
        if opt_addr is None:
            if raw:
                return 0
            search = ["main", "_main"]
        else:
            search = [opt_addr]

        for s in search:
            if s.startswith("0x"):
                a = int(opt_addr, 16)
            else:
                a = self.binary.symbols.get(s, -1)

            if a != -1:
                return a

        raise ExcSymNotFound(search[0])


    def print_section_meta(self, name, start, end):
        print_no_end(color_section(name))
        print_no_end(" [")
        print_no_end(hex(start))
        print_no_end(" - ")
        print_no_end(hex(end))
        print("]")


    def dump(self, ctx, lines):
        from capstone import CS_OP_IMM
        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s_name, s_start, s_end = self.binary.get_section_meta(ctx.addr)
        self.print_section_meta(s_name, s_start, s_end)

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # set jumps color
        i = self.lazy_disasm(ctx.addr, s_start)
        l = 0
        while i is not None and l < lines:
            if ARCH_UTILS.is_jump(i) and i.operands[-1].type == CS_OP_IMM:
                pick_color(i.operands[-1].value.imm)
            i = self.lazy_disasm(i.address + i.size, s_start)
            l += 1

        # Here we have loaded all instructions we want to print
        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        o = ARCH_OUTPUT.Output(ctx)

        # dump
        i = self.lazy_disasm(ctx.addr, s_start)
        l = 0
        while i is not None and l < lines:
            o.print_inst(i)
            i = self.lazy_disasm(i.address + i.size, s_start)
            l += 1


    def print_calls(self, ctx):
        # Print all calls which are in the section containing ctx.addr

        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s_name, s_start, s_end = self.binary.get_section_meta(ctx.addr)
        self.print_section_meta(s_name, s_start, s_end)
        o = ARCH_OUTPUT.Output(ctx)

        i = self.lazy_disasm(s_start, s_start)
        while i is not None:
            if ARCH_UTILS.is_call(i):
                o.print_inst(i)
            i = self.lazy_disasm(i.address + i.size, s_start)


    def print_symbols(self, print_sections, sym_filter=None):
        if sym_filter is not None:
            sym_filter = sym_filter.lower()

        for addr in self.binary.reverse_symbols:
            sy = self.binary.reverse_symbols[addr]
            if sym_filter is None or sym_filter in sy.lower():
                sec_name, _ = self.binary.is_address(addr)
                if sy:
                    print_no_end(color_addr(addr) + " " +
                                 color_symbol("<" + sy + ">"))
                    if print_sections and sec_name is not None:
                        print_no_end(" (" + color_section(sec_name) + ")")
                    print()


    def load_user_sym_file(self, fd):
        for l in fd:
            arg = l.split()
            addr = int(arg[0], 16)
            self.binary.reverse_symbols[addr] = arg[1]
            self.binary.symbols[arg[1]] = addr


    def lazy_disasm(self, addr, stay_in_section=-1):
        _, start, _ = self.binary.get_section_meta(addr)
        if stay_in_section != -1 and start != stay_in_section:
            return None

        if addr in self.code:
            return self.code[addr]
        
        # Disassemble by block of N bytes
        N = 1024

        d = self.binary.section_stream_read(addr, N)
        gen = self.md.disasm(d, addr)

        first = None
        try:
            first = next(gen)
            self.code[first.address] = first

            # Max N instructions (N is in bytes)
            for n in range(N):
                i = next(gen)
                if i.address in self.code:
                    return first
                self.code[i.address] = i
        except StopIteration:
            pass

        return first


    # Generate a flow graph of the given function (addr)
    def get_graph(self, addr):
        from capstone import CS_OP_IMM
        ARCH_UTILS = self.load_arch_module().utils

        curr = self.lazy_disasm(addr)
        if curr == None:
            return None

        gph = Graph(self, addr)
        rest = []
        start = time.clock()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        while 1:
            if not gph.exists(curr):
                if ARCH_UTILS.is_uncond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[-1].type == CS_OP_IMM:
                        addr = curr.operands[-1].value.imm
                        nxt = self.lazy_disasm(addr)
                        gph.set_next(curr, nxt)
                        rest.append(nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr)
                    gph.uncond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_cond_jump(curr) and len(curr.operands) > 0:
                    if curr.operands[-1].type == CS_OP_IMM:
                        nxt_jump = self.lazy_disasm(curr.operands[-1].value.imm)
                        direct_nxt = self.lazy_disasm(curr.address + curr.size)
                        gph.set_cond_next(curr, nxt_jump, direct_nxt)
                        rest.append(nxt_jump.address)
                        rest.append(direct_nxt.address)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.add_node(curr)
                    gph.cond_jumps_set.add(curr.address)

                elif ARCH_UTILS.is_ret(curr):
                    gph.add_node(curr)

                else:
                    try:
                        nxt = self.lazy_disasm(curr.address + curr.size)
                        gph.set_next(curr, nxt)
                        rest.append(nxt.address)
                    except:
                        gph.add_node(curr)
                        pass

            try:
                curr = self.lazy_disasm(rest.pop())
            except IndexError:
                break

        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        elapsed = time.clock()
        elapsed = elapsed - start
        debug__("Graph built in %fs" % elapsed)

        return gph
예제 #4
0
class Disassembler():
    def __init__(self,
                 filename,
                 raw_type,
                 raw_base,
                 raw_big_endian,
                 sym,
                 rev_sym,
                 jmptables,
                 inline_comments,
                 previous_comments,
                 load_symbols=True,
                 mips_gp=-1):
        import capstone as CAPSTONE

        self.code = {}
        self.binary = Binary(filename, raw_type, raw_base, raw_big_endian)

        # TODO: is it a global constant or $gp can change during the execution ?
        self.mips_gp = mips_gp

        arch, mode = self.binary.get_arch()

        if arch is None or mode is None:
            raise ExcArch(self.binary.get_arch_string())

        if load_symbols:
            self.binary.load_symbols()
        else:
            self.binary.symbols = sym
            self.binary.reverse_symbols = rev_sym

        self.binary.load_section_names()

        self.capstone = CAPSTONE
        self.md = CAPSTONE.Cs(arch, mode)
        self.md.detail = True
        self.arch = arch
        self.mode = mode
        self.jmptables = jmptables
        self.inline_comments = inline_comments
        self.previous_comments = previous_comments

    def get_unpack_str(self, size_word):
        if self.mode & self.capstone.CS_MODE_BIG_ENDIAN:
            endian = ">"
        else:
            endian = "<"
        if size_word == 1:
            unpack_str = endian + "B"
        elif size_word == 2:
            unpack_str = endian + "H"
        elif size_word == 4:
            unpack_str = endian + "L"
        elif size_word == 8:
            unpack_str = endian + "Q"
        return unpack_str

    def add_symbol(self, addr, name):
        if name in self.binary.symbols:
            last = self.binary.symbols[name]
            del self.binary.reverse_symbols[last]

        self.binary.symbols[name] = addr
        self.binary.reverse_symbols[addr] = name

        return name

    def read_word(self, ad, size_word):
        unpack_str = self.get_unpack_str(size_word)
        b = self.binary.section_stream_read(ad, size_word)
        if len(b) != size_word:
            return 0
        return struct.unpack(unpack_str, b)[0]

    def read_array(self, ad, array_max_size, size_word):
        unpack_str = self.get_unpack_str(size_word)
        N = size_word * array_max_size
        s = self.binary.get_section(ad)
        array = []
        l = 0

        while l < array_max_size:
            buf = self.binary.section_stream_read(ad, N)
            if not buf:
                break

            i = 0
            while i < len(buf):
                b = buf[i:i + size_word]

                if ad > s.end or len(b) != size_word:
                    return array

                w = struct.unpack(unpack_str, b)[0]
                array.append(w)

                ad += size_word
                i += size_word
                l += 1
                if l >= array_max_size:
                    return array
        return array

    def load_arch_module(self):
        if self.arch == self.capstone.CS_ARCH_X86:
            import lib.arch.x86 as ARCH
        elif self.arch == self.capstone.CS_ARCH_ARM:
            import lib.arch.arm as ARCH
        elif self.arch == self.capstone.CS_ARCH_MIPS:
            import lib.arch.mips as ARCH
        else:
            raise NotImplementedError
        return ARCH

    def get_addr_from_string(self, opt_addr, raw=False):
        if opt_addr is None:
            if raw:
                return 0
            search = ["main", "_main"]
        else:
            search = [opt_addr]

        for s in search:
            if s.startswith("0x"):
                a = int(opt_addr, 16)
            else:
                a = self.binary.symbols.get(s, -1)
                if a == -1:
                    a = self.binary.section_names.get(s, -1)

            if a != -1:
                return a

        raise ExcSymNotFound(search[0])

    def dump_asm(self, ctx, lines):
        from capstone import CS_OP_IMM
        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s = self.binary.get_section(ctx.entry_addr)
        s.print_header()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # set jumps color
        ad = ctx.entry_addr
        l = 0
        while l < lines and ad <= s.end:
            i = self.lazy_disasm(ad, s.start)
            if i is None:
                ad += 1
            else:
                if ARCH_UTILS.is_jump(i) and i.operands[-1].type == CS_OP_IMM:
                    pick_color(i.operands[-1].value.imm)
                ad += i.size
            l += 1

        # Here we have loaded all instructions we want to print
        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()

        # dump
        ad = ctx.entry_addr
        l = 0

        if ad in self.binary.reverse_symbols:
            o._symbol(ad)
            o._new_line()

        while l < lines and ad <= s.end:
            i = self.lazy_disasm(ad, s.start)
            if i is None:
                ad += 1
                o._bad(ad)
            else:
                o._asm_inst(i)
                ad += i.size
            l += 1

        # empty line
        o.lines.pop(-1)
        o.token_lines.pop(-1)

        return o

    def dump_data_ascii(self, ctx, lines):
        N = 128  # read by block of 128 bytes
        addr = ctx.entry_addr

        s = self.binary.get_section(ctx.entry_addr)
        s.print_header()

        l = 0
        ascii_str = []
        addr_str = -1

        while l < lines:
            buf = self.binary.section_stream_read(addr, N)
            if not buf:
                break

            i = 0
            while i < len(buf):

                if addr > s.end:
                    return

                j = i
                while j < len(buf):
                    c = buf[j]
                    if c not in BYTES_PRINTABLE_SET:
                        break
                    if addr_str == -1:
                        addr_str = addr
                    ascii_str.append(c)
                    j += 1

                if c != 0 and j == len(buf):
                    addr += j - i
                    break

                if c == 0 and len(ascii_str) >= 2:
                    print_no_end(color_addr(addr_str))
                    print_no_end(
                        color_string("\"" + "".join(map(get_char, ascii_str)) +
                                     "\""))
                    print(", 0")
                    addr += j - i
                    i = j
                else:
                    print_no_end(color_addr(addr))
                    print("0x%.2x " % buf[i])
                    addr += 1
                    i += 1

                addr_str = -1
                ascii_str = []
                l += 1
                if l >= lines:
                    return

    def dump_data(self, ctx, lines, size_word):
        s = self.binary.get_section(ctx.entry_addr)
        s.print_header()

        ad = ctx.entry_addr

        for w in self.read_array(ctx.entry_addr, lines, size_word):
            if ad in self.binary.reverse_symbols:
                print(color_symbol(self.binary.reverse_symbols[ad]))
            print_no_end(color_addr(ad))
            print_no_end("0x%.2x" % w)

            section = self.binary.get_section(w)

            if section is not None:
                print_no_end(" (")
                print_no_end(color_section(section.name))
                print_no_end(")")
                if size_word >= 4 and w in self.binary.reverse_symbols:
                    print_no_end(" ")
                    print_no_end(color_symbol(self.binary.reverse_symbols[w]))

            ad += size_word
            print()

    def print_calls(self, ctx):
        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s = self.binary.get_section(ctx.entry_addr)
        s.print_header()

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()

        ad = s.start
        while ad < s.end:
            i = self.lazy_disasm(ad, s.start)
            if i is None:
                ad += 1
            else:
                ad += i.size
                if ARCH_UTILS.is_call(i):
                    o._asm_inst(i)

        o.print()

    #
    # sym_filter : search a symbol, non case-sensitive
    #    if it starts with '-', it prints non-matching symbols
    #
    def print_symbols(self, print_sections, sym_filter=None):
        if sym_filter is not None:
            sym_filter = sym_filter.lower()
            if sym_filter[0] == "-":
                invert_match = True
                sym_filter = sym_filter[1:]
            else:
                invert_match = False

        for sy in self.binary.symbols:
            addr = self.binary.symbols[sy]
            if sym_filter is None or \
                    (invert_match and sym_filter not in sy.lower()) or \
                    (not invert_match and sym_filter in sy.lower()):

                if sy:
                    section = self.binary.get_section(addr)
                    print_no_end(color_addr(addr) + " " + sy)
                    if print_sections and section is not None:
                        print_no_end(" (" + color_section(section.name) + ")")
                    print()

    def lazy_disasm(self, addr, stay_in_section=-1):
        s = self.binary.get_section(addr)
        if s is None:
            return None

        if stay_in_section != -1 and s.start != stay_in_section:
            return None

        if addr in self.code:
            return self.code[addr]

        # Disassemble by block of N bytes
        N = 1024
        d = self.binary.section_stream_read(addr, N)
        gen = self.md.disasm(d, addr)

        try:
            first = next(gen)
        except StopIteration:
            return None

        for i in gen:
            if i.address in self.code:
                break
            self.code[i.address] = i

        return first

    def __prefetch_inst(self, inst):
        return self.lazy_disasm(inst.address + inst.size)

    # Generate a flow graph of the given function (addr)
    def get_graph(self, entry_addr):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry_addr)
        stack = [entry_addr]
        start = time()
        prefetch = None

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            if ARCH_UTILS.is_ret(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]
                if op.type == CS_OP_IMM:
                    nxt = op.value.imm
                    stack.append(nxt)
                    gph.new_node(inst, prefetch, [nxt])
                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]
                if op.type == CS_OP_IMM:
                    if self.arch == CS_ARCH_MIPS:
                        direct_nxt = prefetch.address + prefetch.size
                    else:
                        direct_nxt = inst.address + inst.size

                    nxt_jmp = op.value.imm

                    stack.append(direct_nxt)
                    stack.append(nxt_jmp)
                    gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.pe_reverse_stripped_symbols(self)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" %
                (elapsed, len(gph.nodes)))

        return gph, nb_new_syms

    def add_jmptable(self, inst_addr, table_addr, entry_size, nb_entries):
        name = self.add_symbol(table_addr, "jmptable_0x%x" % table_addr)

        table = self.read_array(table_addr, nb_entries, entry_size)
        self.jmptables[inst_addr] = Jmptable(inst_addr, table_addr, table,
                                             name)

        self.inline_comments[inst_addr] = "switch statement %s" % name

        all_cases = {}
        for ad in table:
            all_cases[ad] = []

        case = 0
        for ad in table:
            all_cases[ad].append(case)
            case += 1

        for ad in all_cases:
            self.previous_comments[ad] = \
                ["case %s  %s" % (
                    ", ".join(map(str, all_cases[ad])),
                    name
                )]
예제 #5
0
class Disassembler():
    def __init__(self, filename, raw_type, raw_base,
                 raw_big_endian, sym, rev_sym,
                 jmptables, inline_comments,
                 previous_comments, load_symbols=True,
                 mips_gp=-1):
        import capstone as CAPSTONE

        self.code = {}
        self.binary = Binary(filename, raw_type, raw_base, raw_big_endian)

        # TODO: is it a global constant or $gp can change during the execution ?
        self.mips_gp = mips_gp

        arch, mode = self.binary.get_arch()

        if arch is None or mode is None:
            raise ExcArch(self.binary.get_arch_string())

        if load_symbols:
            self.binary.load_symbols()
        else:
            self.binary.symbols = sym
            self.binary.reverse_symbols = rev_sym

        self.binary.load_section_names()

        self.capstone = CAPSTONE
        self.md = CAPSTONE.Cs(arch, mode)
        self.md.detail = True
        self.arch = arch
        self.mode = mode
        self.jmptables = jmptables
        self.inline_comments = inline_comments
        self.previous_comments = previous_comments


    def get_unpack_str(self, size_word):
        if self.mode & self.capstone.CS_MODE_BIG_ENDIAN:
            endian = ">"
        else:
            endian = "<"
        if size_word == 1:
            unpack_str = endian + "B"
        elif size_word == 2:
            unpack_str = endian + "H"
        elif size_word == 4:
            unpack_str = endian + "L"
        elif size_word == 8:
            unpack_str = endian + "Q"
        return unpack_str


    def add_symbol(self, addr, name):
        if name in self.binary.symbols:
            last = self.binary.symbols[name]
            del self.binary.reverse_symbols[last]

        self.binary.symbols[name] = addr
        self.binary.reverse_symbols[addr] = name

        return name


    def read_word(self, ad, size_word):
        unpack_str = self.get_unpack_str(size_word)
        b = self.binary.section_stream_read(ad, size_word)
        if len(b) != size_word:
            return 0
        return struct.unpack(unpack_str, b)[0]


    def read_array(self, ad, array_max_size, size_word):
        unpack_str = self.get_unpack_str(size_word)
        N = size_word * array_max_size
        s = self.binary.get_section(ad)
        array = []
        l = 0

        while l < array_max_size:
            buf = self.binary.section_stream_read(ad, N)
            if not buf:
                break

            i = 0
            while i < len(buf):
                b = buf[i:i + size_word]

                if ad > s.end or len(b) != size_word:
                    return array

                w = struct.unpack(unpack_str, b)[0]
                array.append(w)

                ad += size_word
                i += size_word
                l += 1
                if l >= array_max_size:
                    return array
        return array


    def load_arch_module(self):
        if self.arch == self.capstone.CS_ARCH_X86:
            import lib.arch.x86 as ARCH
        elif self.arch == self.capstone.CS_ARCH_ARM:
            import lib.arch.arm as ARCH
        elif self.arch == self.capstone.CS_ARCH_MIPS:
            import lib.arch.mips as ARCH
        else:
            raise NotImplementedError
        return ARCH


    def get_addr_from_string(self, opt_addr, raw=False):
        if opt_addr is None:
            if raw:
                return 0
            search = ["main", "_main"]
        else:
            search = [opt_addr]

        for s in search:
            if s.startswith("0x"):
                a = int(opt_addr, 16)
            else:
                a = self.binary.symbols.get(s, -1)
                if a == -1:
                    a = self.binary.section_names.get(s, -1)

            if a != -1:
                return a

        raise ExcSymNotFound(search[0])


    def dump_asm(self, ctx, lines):
        from capstone import CS_OP_IMM
        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s = self.binary.get_section(ctx.entry_addr)
        s.print_header()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # set jumps color
        ad = ctx.entry_addr
        l = 0
        while l < lines and ad <= s.end:
            i = self.lazy_disasm(ad, s.start)
            if i is None:
                ad += 1
            else:
                if ARCH_UTILS.is_jump(i) and i.operands[-1].type == CS_OP_IMM:
                    pick_color(i.operands[-1].value.imm)
                ad += i.size
            l += 1

        # Here we have loaded all instructions we want to print
        if self.binary.type == T_BIN_PE:
            self.binary.pe_reverse_stripped_symbols(self)

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()

        # dump
        ad = ctx.entry_addr
        l = 0

        if ad in self.binary.reverse_symbols:
            o._symbol(ad)
            o._new_line()

        while l < lines and ad <= s.end:
            i = self.lazy_disasm(ad, s.start)
            if i is None:
                ad += 1
                o._bad(ad)
            else:
                o._asm_inst(i)
                ad += i.size
            l += 1

        # empty line
        o.lines.pop(-1)
        o.token_lines.pop(-1)

        return o


    def dump_data_ascii(self, ctx, lines):
        N = 128 # read by block of 128 bytes
        addr = ctx.entry_addr

        s = self.binary.get_section(ctx.entry_addr)
        s.print_header()

        l = 0
        ascii_str = []
        addr_str = -1

        while l < lines:
            buf = self.binary.section_stream_read(addr, N)
            if not buf:
                break

            i = 0
            while i < len(buf):

                if addr > s.end:
                    return

                j = i
                while j < len(buf):
                    c = buf[j]
                    if c not in BYTES_PRINTABLE_SET:
                        break
                    if addr_str == -1:
                        addr_str = addr
                    ascii_str.append(c)
                    j += 1

                if c != 0 and j == len(buf):
                    addr += j - i
                    break

                if c == 0 and len(ascii_str) >= 2:
                    print_no_end(color_addr(addr_str))
                    print_no_end(color_string(
                            "\"" + "".join(map(get_char, ascii_str)) + "\""))
                    print(", 0")
                    addr += j - i
                    i = j
                else:
                    print_no_end(color_addr(addr))
                    print("0x%.2x " % buf[i])
                    addr += 1
                    i += 1

                addr_str = -1
                ascii_str = []
                l += 1
                if l >= lines:
                    return


    def dump_data(self, ctx, lines, size_word):
        s = self.binary.get_section(ctx.entry_addr)
        s.print_header()

        ad = ctx.entry_addr

        for w in self.read_array(ctx.entry_addr, lines, size_word):
            if ad in self.binary.reverse_symbols:
                print(color_symbol(self.binary.reverse_symbols[ad]))
            print_no_end(color_addr(ad))
            print_no_end("0x%.2x" % w)

            section = self.binary.get_section(w)

            if section is not None:
                print_no_end(" (")
                print_no_end(color_section(section.name))
                print_no_end(")")
                if size_word >= 4 and w in self.binary.reverse_symbols:
                    print_no_end(" ")
                    print_no_end(color_symbol(self.binary.reverse_symbols[w]))

            ad += size_word
            print()


    def print_calls(self, ctx):
        ARCH = self.load_arch_module()
        ARCH_UTILS = ARCH.utils
        ARCH_OUTPUT = ARCH.output

        s = self.binary.get_section(ctx.entry_addr)
        s.print_header()

        o = ARCH_OUTPUT.Output(ctx)
        o._new_line()

        ad = s.start
        while ad < s.end:
            i = self.lazy_disasm(ad, s.start)
            if i is None:
                ad += 1
            else:
                ad += i.size
                if ARCH_UTILS.is_call(i):
                    o._asm_inst(i)

        o.print()


    #
    # sym_filter : search a symbol, non case-sensitive
    #    if it starts with '-', it prints non-matching symbols
    #
    def print_symbols(self, print_sections, sym_filter=None):
        if sym_filter is not None:
            sym_filter = sym_filter.lower()
            if sym_filter[0] == "-":
                invert_match = True
                sym_filter = sym_filter[1:]
            else:
                invert_match = False

        for sy in self.binary.symbols:
            addr = self.binary.symbols[sy]
            if sym_filter is None or \
                    (invert_match and sym_filter not in sy.lower()) or \
                    (not invert_match and sym_filter in sy.lower()):

                if sy:
                    section = self.binary.get_section(addr)
                    print_no_end(color_addr(addr) + " " + sy)
                    if print_sections and section is not None:
                        print_no_end(" (" + color_section(section.name) + ")")
                    print()


    def lazy_disasm(self, addr, stay_in_section=-1):
        s = self.binary.get_section(addr)
        if s is None:
            return None

        if stay_in_section != -1 and s.start != stay_in_section:
            return None

        if addr in self.code:
            return self.code[addr]
        
        # Disassemble by block of N bytes
        N = 1024
        d = self.binary.section_stream_read(addr, N)
        gen = self.md.disasm(d, addr)

        try:
            first = next(gen)
        except StopIteration:
            return None

        for i in gen:
            if i.address in self.code:
                break
            self.code[i.address] = i

        return first


    def __prefetch_inst(self, inst):
        return self.lazy_disasm(inst.address + inst.size)


    # Generate a flow graph of the given function (addr)
    def get_graph(self, entry_addr):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry_addr)
        stack = [entry_addr]
        start = time()
        prefetch = None

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            if ARCH_UTILS.is_ret(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]
                if op.type == CS_OP_IMM:
                    nxt = op.value.imm
                    stack.append(nxt)
                    gph.new_node(inst, prefetch, [nxt])
                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                if self.arch == CS_ARCH_MIPS:
                    prefetch = self.__prefetch_inst(inst)
                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]
                if op.type == CS_OP_IMM:
                    if self.arch == CS_ARCH_MIPS:
                        direct_nxt = prefetch.address + prefetch.size
                    else:
                        direct_nxt = inst.address + inst.size

                    nxt_jmp = op.value.imm

                    stack.append(direct_nxt)
                    stack.append(nxt_jmp)
                    gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.pe_reverse_stripped_symbols(self)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes)))

        return gph, nb_new_syms


    def add_jmptable(self, inst_addr, table_addr, entry_size, nb_entries):
        name = self.add_symbol(table_addr, "jmptable_0x%x" % table_addr)

        table = self.read_array(table_addr, nb_entries, entry_size)
        self.jmptables[inst_addr] = Jmptable(inst_addr, table_addr, table, name)

        self.inline_comments[inst_addr] = "switch statement %s" % name

        all_cases = {}
        for ad in table:
            all_cases[ad] = []

        case = 0
        for ad in table:
            all_cases[ad].append(case)
            case += 1

        for ad in all_cases:
            self.previous_comments[ad] = \
                ["case %s  %s" % (
                    ", ".join(map(str, all_cases[ad])),
                    name
                )]