Example #1
0
def resolve_plt(addr, plt_section, exe):
    sym = None
    plt_offset = addr - plt_section['sh_addr'] + plt_section['sh_offset']
    plt_section.stream.seek(plt_offset)
    # "execute" instructions in .plt to find indirection
    rela_addr, size = disasm_plt(plt_section.stream.read(MAX_INSTR_SIZE), addr)
    if not rela_addr:
        return None

    # update rela_addr if it's in the reloc table
    reloc_section = exe.elff.get_section_by_name(".rela.plt")
    if not reloc_section:
        reloc_section = exe.elff.get_section_by_name(".rel.plt")
    if not reloc_section:
        return None

    sym = sym_from_reloc_section(exe, rela_addr, reloc_section)
    if sym:  # found in reloc table
        sym.name = demangle(sym.name) + " (.plt)"
        return sym

    else:  # not in relocation table
        print("not in reloc table")
        section = exe.get_section_from_offset(rela_addr)
        if section.name == ".text":
            return get_symbol_by_addr(rela_addr)
        else:
            print "Unhandled section: " + section.name
            return None
def resolve_plt(addr, plt_section, exe):
    sym = None
    plt_offset = addr - plt_section['sh_addr'] + plt_section['sh_offset']
    plt_section.stream.seek(plt_offset)
    # "execute" instructions in .plt to find indirection
    rela_addr, size = disasm_plt(plt_section.stream.read(MAX_INSTR_SIZE), addr)
    if not rela_addr:
        return None

    # update rela_addr if it's in the reloc table
    reloc_section = exe.elff.get_section_by_name(".rela.plt")
    if not reloc_section:
        reloc_section = exe.elff.get_section_by_name(".rel.plt")
    if not reloc_section:
        return None

    sym = sym_from_reloc_section(exe, rela_addr, reloc_section)
    if sym: # found in reloc table
        sym.name = demangle(sym.name) + " (.plt)"
        return sym

    else: # not in relocation table
        print ("not in reloc table")
        section = exe.get_section_from_offset(rela_addr)
        if section.name == ".text":
            return get_symbol_by_addr(rela_addr)
        else:
            print "Unhandled section: " + section.name
            return None
Example #3
0
def getVtable(typeDie):
    global die_list
    vtable = []
    for child in typeDie.iter_children():
        if child.tag == 'DW_TAG_subprogram' \
        and child.attributes.get('DW_AT_virtuality') \
        and child.attributes.get('DW_AT_vtable_elem_location'):
            elem_location = child.attributes.get('DW_AT_vtable_elem_location')
            if elem_location.form == 'DW_FORM_exprloc':
                loc_pieces = describe_DWARF_expr(elem_location.value,
                                                 child.cu.structs)
                index = loc_pieces[0]
                if child.attributes.get('DW_AT_linkage_name'):
                    name = child.attributes.get('DW_AT_linkage_name').value
                elif child.attributes.get('DW_AT_name'):
                    name = child.attributes.get('DW_AT_name').value
                else:
                    name = "(Cannot determine name)"
                vtable.append({
                    "index": index,
                    "name": demangle(name)
                })
            elif elem_location.form == 'DW_FORM_loclistptr':
                print 'Cannot currently handle form DW_FORM_loclistptr'
            else:
                print 'Unexpected form {} for vtable_elem_location'.format(
                    elem_location.form)
    return vtable
Example #4
0
    def get_symbol_by_addr(self, symbol_addr, instr_addr, instr_size=0, get_sub_symbol=False):
        symtab = self.elff.get_section_by_name('.symtab')
        if self._symbol_addr_map is None:
            self._symbol_addr_map = list(symtab.iter_symbols())
            self._symbol_addr_map.sort(key=lambda symbol: symbol.entry['st_value'])
            self._symbol_addr_map_keys = [symbol.entry['st_value'] for symbol in self._symbol_addr_map]
        
        index = bisect_right(self._symbol_addr_map_keys, symbol_addr) - 1
        sym = self._symbol_addr_map[index]
        if sym.entry['st_value'] <= symbol_addr < (sym.entry['st_value'] + sym.entry['st_size']):
            if get_sub_symbol:
                member_name = self.get_sub_symbol_by_offset(
                    demangle(sym.name).split(':')[-1], 
                    symbol_addr - sym.entry['st_value'], 
                    instr_addr)
                return (sym, member_name,)
            else:
                return (sym, None)

        # relocation
        section = self.get_section_from_offset(symbol_addr)
        sym = None
        if section.name == ".plt":
            sym = relocs.resolve_plt(symbol_addr, section, self)
        elif section.name == ".got":
            sym = relocs.resolve_got(symbol_addr, section, self)
        elif section.name == ".got.plt":
            print "found a .got.plt"
        else:
            print section.name
        return (sym, None)
Example #5
0
def resolve_got(addr, got_section, exe):
    # is GOT always populated by .dyn?? unclear. TODO
    reloc_section = exe.elff.get_section_by_name(".rela.dyn")
    if not reloc_section:
        reloc_section = exe.elff.get_section_by_name(".rel.dyn")
    if not reloc_section:
        return None

    sym = sym_from_reloc_section(exe, addr, reloc_section)
    if sym:
        sym.name = demangle(sym.name) + " (.got)"
        return sym

    else:
        print "not in reloc table"
        return None
def resolve_got(addr, got_section, exe):
    # is GOT always populated by .dyn?? unclear. TODO
    reloc_section = exe.elff.get_section_by_name(".rela.dyn")
    if not reloc_section:
        reloc_section = exe.elff.get_section_by_name(".rel.dyn")
    if not reloc_section:
        return None
        
    sym = sym_from_reloc_section(exe, addr, reloc_section)
    if sym:
        sym.name = demangle(sym.name) + " (.got)"
        return sym

    else:
        print "not in reloc table"
        return None
Example #7
0
    def get_all_functions(self):
        function_syms = self.get_function_syms()

        # get offset and beginning of .text section
        # *** currently assuming all functions always in .text TODO!!!!!!!!!!!
        # there are some symbols that cause the offset to go negative so yeah let's fix that
        section = self.elff.get_section_by_name(".text")

        functions = []
        #  load info for each symbol into functions[]
        for sym in function_syms:
            func = {}
            func["offset"] = sym["st_value"] - section["sh_addr"] + section["sh_offset"]
            func["st_value"] = sym["st_value"]
            func["size"] = sym["st_size"]
            func["name"] = demangle(sym.name)
            func["undef"] = sym["st_shndx"] == "SHN_UNDEF"
            functions.append(func)
        return functions
Example #8
0
def disasm(exe, bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic,
                                     instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True

            # Handle jump/call instructions
            elif instr.group(x86.X86_GRP_JUMP) or instr.group(
                    x86.X86_GRP_CALL):
                # jump table
                if instr.group(x86.X86_GRP_JUMP
                               ) and instr.operands[0].type == x86.X86_OP_REG:
                    instr.jump_table = instr.reg_name(instr.operands[0].reg)

                # We can only decode the destination if it's an immediate value
                elif instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled) - 1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol, field_name = exe.get_symbol_by_addr(
                            dest_addr, instr.address)
                        if symbol:
                            text_sect = exe.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']

                            instr.comment = demangle(symbol.name)
                            # only follow call address if it is a known location
                            if symbol['st_size'] > 0:
                                instr.external_jump = True
                                instr.jump_address = symbol["st_value"]
                                instr.jump_function_name = demangle(
                                    symbol.name)
                                instr.jump_function_address = symbol[
                                    "st_value"]
                                instr.jump_function_offset = symbol[
                                    "st_value"] - sect_addr + sect_offset
                                instr.jump_function_size = symbol['st_size']

            if instr.group(x86.X86_GRP_RET):
                instr.return_type = True
            # Handle individual operands
            c = -1
            instr.regs_explicit = []
            for op in instr.operands:
                c += 1
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[
                        i + 1].address + instr.rip_offset

                    # file offset depends on section
                    section = exe.get_section_from_offset(instr.rip_resolved)
                    file_offset = instr.rip_resolved - section[
                        "sh_addr"] + section["sh_offset"]

                    # Read in and unpack the first byte at the offset
                    val_8 = exe.get_bytes(file_offset, 1)
                    instr.signed_8 = unpack('b', val_8)[0]
                    instr.unsigned_8 = unpack('B', val_8)[0]
                    instr.hex_8 = hex(instr.unsigned_8)

                    # Read in and unpack the first two bytes at the offset
                    val_16 = exe.get_bytes(file_offset, 2)
                    instr.signed_16 = unpack('h', val_16)[0]
                    instr.unsigned_16 = unpack('H', val_16)[0]
                    instr.hex_16 = hex(instr.unsigned_16)

                    # Read in and unpack the first four bytes at the offset
                    val_32 = exe.get_bytes(file_offset, 4)
                    instr.signed_32 = unpack('i', val_32)[0]
                    instr.unsigned_32 = unpack('I', val_32)[0]
                    instr.hex_32 = hex(instr.unsigned_32)
                    instr.float = unpack('f', val_32)[0]

                    # Read in and unpack the first eight bytes at the offset
                    val_64 = exe.get_bytes(file_offset, 8)
                    instr.signed_64 = unpack('q', val_64)[0]
                    instr.unsigned_64 = unpack('Q', val_64)[0]
                    instr.hex_64 = hex(instr.unsigned_64)
                    instr.double = unpack('d', val_64)[0]

                    symbol, field_name = exe.get_symbol_by_addr(
                        instr.rip_resolved,
                        instr.address,
                        instr_size=op.size,
                        get_sub_symbol=True)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                        if field_name:
                            instr.comment += '.' + field_name
                    bytes = exe.get_bytes(file_offset, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
                # Handle explicitly read/written registers
                if op.type == x86.X86_OP_MEM:
                    ptr = [
                        "", "", ""
                    ]  # using an array instead of object to guarantee ordering
                    instr.regs_ptr_explicit = []
                    if op.value.mem.base != 0:
                        regname = instr.reg_name(op.value.mem.base)
                        ptr[0] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.index != 0:
                        regname = instr.reg_name(op.value.mem.index)
                        ptr[1] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.disp != 0:
                        ptr[2] = hex(op.value.mem.disp)

                    instr.ptr = ptr
                    instr.ptr_size = op.size
                    instr.regs_explicit.append(instr.ptr)
                elif op.type == x86.X86_OP_REG:
                    instr.regs_explicit.append(instr.reg_name(op.value.reg))
                else:
                    instr.regs_explicit.append("")

            # what registers does this instruction read/write?
            instr.regs_write_implicit = [
                instr.reg_name(reg) for reg in instr.regs_write
            ]
            if instr.group(x86.X86_GRP_CALL) and instr.reg_name(
                    x86.X86_REG_RAX) not in instr.regs_write_implicit:
                instr.regs_write_implicit.append(
                    instr.reg_name(x86.X86_REG_RAX))
            instr.regs_read_implicit = [
                instr.reg_name(reg) for reg in instr.regs_read
            ]
            # Add in documentation meta-data
            instr.short_desc, instr.docfile = get_documentation(instr)
            if instr.docfile is None or instr.short_desc is None:
                with open(CUR_PATH + 'missing_docs.log', 'a+') as f:
                    f.write('[{}] : {} : {} : {}\n'.format(
                        str(datetime.datetime.now()), instr.mnemonic,
                        instr.docfile, instr.short_desc))
        return disassembled

    except CsError as e:
        print("ERROR: %s" % e)
Example #9
0
def disasm(exe, bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True

            # Handle jump/call instructions            
            elif instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL):
                # jump table
                if instr.group(x86.X86_GRP_JUMP) and instr.operands[0].type == x86.X86_OP_REG: 
                    instr.jump_table = instr.reg_name(instr.operands[0].reg)

                # We can only decode the destination if it's an immediate value
                elif instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled)-1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol, field_name = exe.get_symbol_by_addr(
                            dest_addr, 
                            instr.address)
                        if symbol:
                            text_sect = exe.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']
                            
                            instr.comment = demangle(symbol.name)
                            # only follow call address if it is a known location
                            if symbol['st_size'] > 0:
                                instr.external_jump = True
                                instr.jump_address = symbol["st_value"]
                                instr.jump_function_name = demangle(symbol.name)
                                instr.jump_function_address = symbol["st_value"]
                                instr.jump_function_offset = symbol["st_value"] - sect_addr + sect_offset
                                instr.jump_function_size = symbol['st_size']

            if instr.group(x86.X86_GRP_RET):
                instr.return_type = True
            # Handle individual operands
            c = -1
            instr.regs_explicit = []
            for op in instr.operands:
                c += 1
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[i+1].address + instr.rip_offset

                    # file offset depends on section
                    section = exe.get_section_from_offset(instr.rip_resolved)
                    file_offset = instr.rip_resolved - section["sh_addr"] + section["sh_offset"]

                    # Read in and unpack the first byte at the offset
                    val_8 = exe.get_bytes(file_offset, 1)
                    instr.signed_8 = unpack('b', val_8)[0]
                    instr.unsigned_8 = unpack('B', val_8)[0]
                    instr.hex_8 = hex(instr.unsigned_8)

                    # Read in and unpack the first two bytes at the offset
                    val_16 = exe.get_bytes(file_offset, 2)
                    instr.signed_16 = unpack('h', val_16)[0]
                    instr.unsigned_16 = unpack('H', val_16)[0]
                    instr.hex_16 = hex(instr.unsigned_16)

                    # Read in and unpack the first four bytes at the offset
                    val_32 = exe.get_bytes(file_offset, 4)
                    instr.signed_32 = unpack('i', val_32)[0]
                    instr.unsigned_32 = unpack('I', val_32)[0]
                    instr.hex_32 = hex(instr.unsigned_32)
                    instr.float = unpack('f', val_32)[0]

                    # Read in and unpack the first eight bytes at the offset
                    val_64 = exe.get_bytes(file_offset, 8)
                    instr.signed_64 = unpack('q', val_64)[0]
                    instr.unsigned_64 = unpack('Q', val_64)[0]
                    instr.hex_64 = hex(instr.unsigned_64)
                    instr.double = unpack('d', val_64)[0]

                    symbol, field_name = exe.get_symbol_by_addr(
                        instr.rip_resolved, 
                        instr.address,
                        instr_size=op.size,
                        get_sub_symbol=True)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                        if field_name:
                            instr.comment += '.' + field_name
                    bytes = exe.get_bytes(file_offset, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
                # Handle explicitly read/written registers
                if op.type == x86.X86_OP_MEM:
                    ptr = ["", "", ""] # using an array instead of object to guarantee ordering
                    instr.regs_ptr_explicit = []
                    if op.value.mem.base != 0:
                        regname = instr.reg_name(op.value.mem.base)
                        ptr[0] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.index != 0:
                        regname = instr.reg_name(op.value.mem.index)
                        ptr[1] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.disp != 0:
                        ptr[2] = hex(op.value.mem.disp)

                    instr.ptr = ptr
                    instr.ptr_size = op.size
                    instr.regs_explicit.append(instr.ptr)
                elif op.type == x86.X86_OP_REG:
                    instr.regs_explicit.append(instr.reg_name(op.value.reg))
                else:
                    instr.regs_explicit.append("")

            # what registers does this instruction read/write?
            instr.regs_write_implicit = [instr.reg_name(reg) for reg in instr.regs_write]
            if instr.group(x86.X86_GRP_CALL) and instr.reg_name(x86.X86_REG_RAX) not in instr.regs_write_implicit:
                instr.regs_write_implicit.append(instr.reg_name(x86.X86_REG_RAX))
            instr.regs_read_implicit = [instr.reg_name(reg) for reg in instr.regs_read]
            # Add in documentation meta-data
            instr.short_desc, instr.docfile = get_documentation(instr)
            if instr.docfile is None or instr.short_desc is None:
                with open(CUR_PATH + 'missing_docs.log', 'a+') as f:
                    f.write('[{}] : {} : {} : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic, instr.docfile, instr.short_desc))
        return disassembled

    except CsError as e:
        print("ERROR: %s" %e)