def __init__(self,f):
		elf = ELFFile(f)
		if elf.get_machine_arch() == 'x64':
			self.bit = 64
		elif elf.get_machine_arch() == 'x86':
			self.bit = 32
		self.text = elf.get_section_by_name('.text').data()
		self.text_addr = elf.get_section_by_name('.text').header['sh_addr']
		dynsym = elf.get_section_by_name('.dynsym')

		dynsym_list = [0]
		if self.bit == 64:
			relplt = elf.get_section_by_name('.rela.plt')
		elif self.bit == 32:
			relplt = elf.get_section_by_name('.rel.plt')
		relpltdata = relplt.data()
		for x in range(0,len(relpltdata),relplt.header['sh_entsize']):
			tmp = relpltdata[x:x+relplt.header['sh_entsize']]
			if self.bit == 64:
				_,_,num,_ = unpack('QIIQ',tmp)
			elif self.bit == 32:
				_,num = unpack('II',tmp)
				num = num >> 8
			dynsym_list.append(num)

		c = 0
		for sym in dynsym.iter_symbols():
			if c in dynsym_list:
				dynsym_list[dynsym_list.index(c)] = sym.name
			c+=1

		got_plt = elf.get_section_by_name('.got.plt')
		got_plt_data = got_plt.data()
		plt = elf.get_section_by_name('.plt')
		plt_data = plt.data()
		self.funcs = {}

		for n in range(0,len(plt_data),16):
			tmp = n + plt.header['sh_addr']
			self.funcs[tmp] = dynsym_list[n/16]

		before = None
		if self.bit == 32:
			for i in md32.disasm(self.text,self.text_addr):
				if i and i.mnemonic == 'call' and i.operands[0].imm in self.funcs and self.funcs[i.operands[0].imm] == '__libc_start_main':
					self.ep = before.operands[0].imm
					break
				before = i

		elif self.bit == 64:
			for i in md64.disasm(self.text,self.text_addr):
				if i and i.mnemonic == 'call' and i.operands[0].imm in self.funcs and self.funcs[i.operands[0].imm] == '__libc_start_main':
					self.ep = before.operands[1].imm
					break
				before = i

		self.rodata = elf.get_section_by_name('.rodata')
		self.rodata_data = self.rodata.data()

		f.close()
Ejemplo n.º 2
0
def process_elf(elf: ELFFile):
    if elf.get_machine_arch() != "ARM":
        raise Exception("Invalid machine arch {} (not ARM)".format(
            elf.get_machine_arch()))

    for symbol_table in find_section(elf, SymbolTableSection):
        symbols = [
            SimpleSymbol.from_symbol(obj)
            for obj in symbol_table.iter_symbols()
            if (obj['st_info']['type'] == "STT_FUNC" or obj['st_info']['type']
                == "STT_OBJECT") and obj['st_info']['bind'] == "STB_GLOBAL"
        ]

        for symbol in sorted(symbols, key=lambda x: x.st_value):
            yield symbol
Ejemplo n.º 3
0
def get_frame_base(filename, pc, rebased_addr):
    """
    Call to get frame base
    :param filename: name of the executable file
    :param pc: The address of the beginning of the function
    :param rebased_addr: Should be project.loader.memory.min_addr
    :return: the frame base for the function
    """
    target_loc = pc - rebased_addr
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        min_greater = 1000000000000000000000
        offset = 0
        for CFI in dwarfinfo.EH_CFI_entries():
            if isinstance(CFI, FDE):
                decoded = CFI.get_decoded()
                for entry in decoded.table:
                    if entry['pc'] >= target_loc and entry['pc'] < min_greater:
                        offset = entry['cfa'].offset
                        min_greater = entry['pc']
        return offset
Ejemplo n.º 4
0
def library_to_sqlalchemy(filepath, filename=None):
    with open(filepath) as fileobj:
        elf_data = fileobj.read()
    checksum = hash_algo(elf_data).hexdigest()
    if filename is None:
        filename = os.path.basename(filepath)
    elf = ELFFile(StringIO(elf_data))
    library = Library(name=filename,
                      checksum=checksum,
                      filepath=filepath,
                      elfclass=elf.elfclass,
                      machine_arch=elf.get_machine_arch())
    symtab = elf.get_section_by_name(".symtab")
    dynsym = elf.get_section_by_name(".dynsym")
    if not symtab and not dynsym:
        raise Exception("No symbol table found")
    elif symtab and dynsym:
        symbols = chain(symtab.iter_symbols(), dynsym.iter_symbols())
    elif symtab:
        symbols = symtab.iter_symbols()
    else:
        symbols = dynsym.iter_symbols()
    seen_symbols = set()
    symbol_entities = []
    for symbol in symbols:
        if not symbol.name or not symbol.entry["st_value"] or \
            symbol.name in seen_symbols:
            continue
        symbol_entities.append(
            Symbol(name=symbol.name,
                   addr=symbol.entry["st_value"],
                   library=library))
        seen_symbols.add(symbol.name)
    return library, symbol_entities
    def _validate_elf(firmware_path: pathlib.Path, platform: Platform) -> None:
        # Check if firmware's architecture matches system's architecture
        with open(firmware_path, "rb") as file:
            try:
                elf_file = ELFFile(file)
                firm_arch = elf_file.get_machine_arch()
            except Exception as error:
                raise InvalidFirmwareFile(
                    f"Given file is not a valid ELF: {error}") from error
        running_arch = system_platform.machine()
        if firm_arch != get_correspondent_elf_arch(running_arch):
            raise InvalidFirmwareFile(
                f"Firmware's architecture ({firm_arch}) does not match system's ({running_arch})."
            )

        # Check if firmware's platform matches system platform
        try:
            firm_decoder = Decoder()
            firm_decoder.process(firmware_path)
            firm_board = firm_decoder.fwversion.board_type
            firm_sub_board = firm_decoder.fwversion.board_subtype
            current_decoder_platform = get_correspondent_decoder_platform(
                platform)
            if not current_decoder_platform in [firm_board, firm_sub_board]:
                InvalidFirmwareFile(
                    f"Firmware's platform ({current_decoder_platform}) does not match system's ({platform})."
                )
        except Exception as error:
            raise InvalidFirmwareFile(
                "Given firmware is not a supported version.") from error
Ejemplo n.º 6
0
Archivo: libdb.py Proyecto: 1uks/libdb
def library_to_sqlalchemy(filepath, filename=None):
    with open(filepath) as fileobj:
        elf_data = fileobj.read()
    checksum = hash_algo(elf_data).hexdigest()
    if filename is None:
        filename = os.path.basename(filepath)
    elf = ELFFile(StringIO(elf_data))
    library = Library(name=filename, checksum=checksum, filepath=filepath,
                      elfclass=elf.elfclass, machine_arch=elf.get_machine_arch())
    symtab = elf.get_section_by_name(".symtab")
    dynsym = elf.get_section_by_name(".dynsym")
    if not symtab and not dynsym:
        raise Exception("No symbol table found")
    elif symtab and dynsym:
        symbols = chain(symtab.iter_symbols(), dynsym.iter_symbols())
    elif symtab:
        symbols = symtab.iter_symbols()
    else:
        symbols = dynsym.iter_symbols()
    seen_symbols = set()
    symbol_entities = []
    for symbol in symbols:
        if not symbol.name or not symbol.entry["st_value"] or \
            symbol.name in seen_symbols:
            continue
        symbol_entities.append(Symbol(name=symbol.name,
                                      addr=symbol.entry["st_value"],
                                      library=library))
        seen_symbols.add(symbol.name)
    return library, symbol_entities
Ejemplo n.º 7
0
def open(io):
    elf_o = ELFFile(io)
    info('parsed elf file with %s sections and %s segments' %
         (elf_o.num_sections(), elf_o.num_segments()))
    arch = sefi.arch.from_elf_machine_arch(elf_o.get_machine_arch())
    info('  elf file arch is %s' % (arch))

    return (elf_o, arch)
Ejemplo n.º 8
0
def process_file(filename):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)
        # elfclass is a public attribute of ELFFile, read from its header
        print('%s: elfclass is %s' % (filename, elffile.elfclass))
        print(elffile.get_machine_arch())
        ss = elffile.get_section_by_name('.text')
        print ss
Ejemplo n.º 9
0
def process_file(filename):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)
        # elfclass is a public attribute of ELFFile, read from its header
        print('%s: elfclass is %s' % (filename, elffile.elfclass))
        print(elffile.get_machine_arch())
        ss = elffile.get_section_by_name('.text')
        print ss
Ejemplo n.º 10
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' %
                  (CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    # Check if this attribute contains location information
                    # pdb.set_trace()
                    if loc_parser.attribute_has_location(attr, CU['version']):
                        var_name = DIE.attributes['DW_AT_name'].value
                        print(' Varname:%s' % (var_name))
                        print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                        loc = loc_parser.parse_from_attribute(
                            attr, CU['version'])
                        # We either get a list (in case the attribute is a
                        # reference to the .debug_loc section) or a LocationExpr
                        # object (in case the attribute itself contains location
                        # information).
                        if isinstance(loc, LocationExpr):
                            print('      %s' % (describe_DWARF_expr(
                                loc.loc_expr, dwarfinfo.structs)))
                        elif isinstance(loc, list):
                            print(show_loclist(loc, dwarfinfo,
                                               indent='      '))
Ejemplo n.º 11
0
def META_ELF(s, buff):
   elffile = ELFFile(StringIO(buff))

   META_ELF = { 'Arch' : elffile.get_machine_arch(),
                'Debug Entries' : get_die_entries(elffile) }

   META_ELF['Section Names'], META_ELF['Symbol Names'] = get_section_names(elffile)  

   return META_ELF
Ejemplo n.º 12
0
    def test_hello(self):
        with open(os.path.join('test', 'testfiles', 'simple_gcc.elf.arm'), 'rb') as f:
            elf = ELFFile(f)
            self.assertEqual(elf.get_machine_arch(), 'ARM')

            # Check some other properties of this ELF file derived from readelf
            self.assertEqual(elf['e_entry'], 0x8018)
            self.assertEqual(elf.num_sections(), 14)
            self.assertEqual(elf.num_segments(), 2)
Ejemplo n.º 13
0
def open(io):
	elf_o = ELFFile(io)
	info('parsed elf file with %s sections and %s segments' % 
		(elf_o.num_sections(), elf_o.num_segments())
	)
	arch = sefi.arch.from_elf_machine_arch(elf_o.get_machine_arch())
	info('  elf file arch is %s' % (arch))
	
	return (elf_o, arch)
Ejemplo n.º 14
0
Archivo: elf.py Proyecto: f0wl/REHelper
def print_basic_info(filename: str) -> None:
    with open(filename, "rb") as f:
        elffile = ELFFile(f)  # ELF object

        # variables
        sections = ""
        debug = RED + "No" + RESET
        fileMD5 = file_MD5sum(filename)
        filesha1 = file_sha1sum(filename)
        filesha256 = file_sha256sum(filename)
        fileSSDEEP = file_ssdeepsum(filename)
        vtlink = tinyurl("https://www.virustotal.com/gui/file/" + filesha256)

        # logic
        if not vtlink:
            vtlink = "https://www.virustotal.com/gui/file/" + filesha256
        for x in range(elffile.num_sections()):
            if len(elffile.get_section(x).name) > 0:
                sections += "{}{} {}({}) ".format(
                    GREEN,
                    elffile.get_section(x).name, RESET,
                    hex(elffile.get_section(x).data_size))
            if x % 4 == 0 and x > 0:
                sections += "\n"

        if not sections:
            sections = RED + "No sections found" + RESET
        # has debug info?
        if elffile.has_dwarf_info():
            debug = GREEN + "Yes" + RESET

        info_table = [
            ["Filename:", filename], ["Filesize:",
                                      file_size(filename)],
            [
                "Filetype:",
                GREEN + "ELF " + str(elffile.get_machine_arch()) + RESET
            ],
            [
                "Subsystem:",
                GREEN + describe_e_type(elffile.header['e_type']) + RESET
            ], ["MD5: ", fileMD5], ["SHA1: ", filesha1],
            ["SHA256: ", filesha256], ["SSDEEP:", fileSSDEEP],
            ["VT link:", vtlink], ["Symbols:", debug],
            ["Entropy:", str(file_entropy(filename))],
            ["Sections:\n(with size)", sections],
            ["Entrypoint:", "{}".format(hex(elffile.header["e_entry"]))]
        ]

        print("")
        print(
            AsciiTable(
                title="Basic Information",
                table_data=info_table,
            ).table)
        print("")
Ejemplo n.º 15
0
    def test_hello(self):
        with open(os.path.join('test', 'testfiles_for_unittests',
                               'simple_gcc.elf.arm'), 'rb') as f:
            elf = ELFFile(f)
            self.assertEqual(elf.get_machine_arch(), 'ARM')

            # Check some other properties of this ELF file derived from readelf
            self.assertEqual(elf['e_entry'], 0x8018)
            self.assertEqual(elf.num_sections(), 14)
            self.assertEqual(elf.num_segments(), 2)
Ejemplo n.º 16
0
    def test_hello(self):
        with open(os.path.join('test', 'testfiles_for_unittests',
                               'simple_gcc.elf.mips'), 'rb') as f:
            elf = ELFFile(f)
            self.assertEqual(elf.get_machine_arch(), 'MIPS')

            # Check some other properties of this ELF file derived from readelf
            self.assertEqual(elf['e_entry'], 0x0)
            self.assertEqual(elf.num_sections(), 25)
            self.assertEqual(elf.num_segments(), 0)
Ejemplo n.º 17
0
class Elf(Binary):
    def __init__(self, filename):
        super().__init__(filename)
        self.elf = ELFFile(open(filename, "rb"))
        self.arch = {
            "x86": "i386",
            "x64": "amd64"
        }[self.elf.get_machine_arch()]
        assert self.elf.header.e_type in ["ET_DYN", "ET_EXEC", "ET_CORE"]

        # Get interpreter elf
        self.interpreter = None
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type != "PT_INTERP":
                continue
            self.interpreter = Elf(elf_segment.data()[:-1])
            break
        if self.interpreter is not None:
            assert self.interpreter.arch == self.arch
            assert self.interpreter.elf.header.e_type in ["ET_DYN", "ET_EXEC"]

    def __del__(self):
        if self.elf is not None:
            self.elf.stream.close()

    def maps(self):
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type != "PT_LOAD" or elf_segment.header.p_memsz == 0:
                continue

            flags = elf_segment.header.p_flags
            # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read
            perms = ["   ", "  x", " w ", " wx", "r  ", "r x", "rw ",
                     "rwx"][flags & 7]
            if "r" not in perms:
                raise BinaryException(
                    "Not readable map from cgc elf not supported")

            # CGCMAP--
            assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0
            yield ((
                elf_segment.header.p_vaddr,
                elf_segment.header.p_memsz,
                perms,
                elf_segment.stream.name,
                elf_segment.header.p_offset,
                elf_segment.header.p_filesz,
            ))

    def getInterpreter(self):
        return self.interpreter

    def threads(self):
        yield (("Running", {"EIP": self.elf.header.e_entry}))
Ejemplo n.º 18
0
 def __init__(self, elf_file: ELFFile, debug_root: str = None):
   self._elf_file = elf_file
   self._debug_root = debug_root
   self._arch = elf_file.get_machine_arch()
   self._dwarf_info = elf_file.get_dwarf_info()
   self._range_lists = self._dwarf_info.range_lists()
   self._location_lists = self._dwarf_info.location_lists()
   self._die_map = dict()
   self._line_programs = dict()
   self._debug_str = None
   self._logger = logging.getLogger('DWARFData')
   self._index()
Ejemplo n.º 19
0
def process_dwarf_info(in_file, out_file):
    '''
    Main function processing the dwarf informations from debug sections
  '''
    DEBUG('Processing file: {0}'.format(in_file))

    with open(in_file, 'rb') as f:
        f_elf = ELFFile(f)
        if not f_elf.has_dwarf_info():
            DEBUG("{0} has no debug informations!".format(file))
            return False

        M = CFG_pb2.Module()
        M.name = "GlobalVariable".format('utf-8')

        set_global_machine_arch(f_elf.get_machine_arch())
        dwarf_info = f_elf.get_dwarf_info()
        process_types(dwarf_info, TYPES_MAP)
        process_frames(dwarf_info, EH_FRAMES)
        section_offset = dwarf_info.debug_info_sec.global_offset

        # Iterate through all the compile units
        for CU in dwarf_info.iter_CUs():
            DEBUG('Found a compile unit at offset {0}, length {1}'.format(
                CU.cu_offset, CU['unit_length']))
            top_DIE = CU.get_top_DIE()
            c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset,
                           section_offset)
            c_unit.decode_control_unit(M, GLOBAL_VARIABLES)

        for key, value in GLOBAL_VARIABLES.iteritems():
            if value["size"] > 0:
                gvar = M.global_vars.add()
                gvar.name = value["name"]
                gvar.ea = value["addr"]
                gvar.size = value["size"]
            else:
                DEBUG("Look for {}".format(pprint.pformat(value)))

        #for func in M.funcs:
        #  DEBUG("Function name {}".format(func.name))
        #  for sv in func.stackvars:
        #    DEBUG_PUSH()
        #    DEBUG("{} : {}, ".format(sv.name, sv.sp_offset))
        #    DEBUG_POP()

        with open(out_file, "w") as outf:
            outf.write(M.SerializeToString())

    DEBUG("Global Vars\n")
    DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES)))
    DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES)))
    DEBUG("End Global Vars\n")
Ejemplo n.º 20
0
class CGCElf(Binary):
    @staticmethod
    def _cgc2elf(filename):
        # hack begin so we can use upstream Elftool
        with open(filename, "rb") as fd:
            stream = io.BytesIO(fd.read())
            stream.write(b"\x7fELF")
            stream.name = fd.name
            return stream

    def __init__(self, filename):
        super().__init__(filename)
        stream = self._cgc2elf(filename)
        self.elf = ELFFile(stream)
        self.arch = {
            "x86": "i386",
            "x64": "amd64"
        }[self.elf.get_machine_arch()]

        assert "i386" == self.arch
        assert self.elf.header.e_type in ["ET_EXEC"]

    def maps(self):
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type not in [
                    "PT_LOAD", "PT_NULL", "PT_PHDR", "PT_CGCPOV2"
            ]:
                raise BinaryException("Not Supported Section")

            if elf_segment.header.p_type != "PT_LOAD" or elf_segment.header.p_memsz == 0:
                continue

            flags = elf_segment.header.p_flags
            # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read
            perms = ["   ", "  x", " w ", " wx", "r  ", "r x", "rw ",
                     "rwx"][flags & 7]
            if "r" not in perms:
                raise BinaryException(
                    "Not readable map from cgc elf not supported")

            # CGCMAP--
            assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0
            yield ((
                elf_segment.header.p_vaddr,
                elf_segment.header.p_memsz,
                perms,
                elf_segment.stream.name,
                elf_segment.header.p_offset,
                elf_segment.header.p_filesz,
            ))

    def threads(self):
        yield (("Running", {"EIP": self.elf.header.e_entry}))
Ejemplo n.º 21
0
    def _postprocessing_candidates(src_dir: str) -> PostprocessingCandidates:
        """Search for binaries that need to be post-processed."""
        strip_arm = []
        strip_x86 = []
        patch_rm2fb = []

        for directory, _, files in os.walk(src_dir):
            for file_name in files:
                file_path = os.path.join(directory, file_name)

                try:
                    with open(file_path, "rb") as file:
                        info = ELFFile(file)
                        symtab = info.get_section_by_name(".symtab")

                        if info.get_machine_arch() == "ARM":
                            if symtab:
                                strip_arm.append(file_path)

                            dynamic = info.get_section_by_name(".dynamic")
                            rodata = info.get_section_by_name(".rodata")

                            if (dynamic and rodata
                                    and rodata.data().find(b"/dev/fb0") != -1):
                                patch_rm2fb.append(file_path)
                        elif (info.get_machine_arch() in ("x86", "x64")
                              and symtab):
                            strip_x86.append(file_path)
                except ELFError:
                    # Ignore non-ELF files
                    pass
                except IsADirectoryError:
                    # Ignore directories
                    pass

        return PostprocessingCandidates(
            strip_arm=strip_arm,
            strip_x86=strip_x86,
            patch_rm2fb=patch_rm2fb,
        )
Ejemplo n.º 22
0
def process_dwarf_info(in_file, out_file):
  '''
    Main function processing the dwarf informations from debug sections
  '''
  DEBUG('Processing file: {0}'.format(in_file))
    
  with open(in_file, 'rb') as f:
    f_elf = ELFFile(f)    
    if not f_elf.has_dwarf_info():
      DEBUG("{0} has no debug informations!".format(file))
      return False
        
    M = CFG_pb2.Module()
    M.name = "GlobalVariable".format('utf-8')
    
    set_global_machine_arch(f_elf.get_machine_arch())
    dwarf_info = f_elf.get_dwarf_info()
    process_types(dwarf_info, TYPES_MAP)    
    process_frames(dwarf_info, EH_FRAMES)
    section_offset = dwarf_info.debug_info_sec.global_offset
    
    # Iterate through all the compile units
    for CU in dwarf_info.iter_CUs():
      DEBUG('Found a compile unit at offset {0}, length {1}'.format(CU.cu_offset, CU['unit_length']))
      top_DIE = CU.get_top_DIE()
      c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset)
      c_unit.decode_control_unit(M, GLOBAL_VARIABLES)
        
    for key, value in GLOBAL_VARIABLES.iteritems():
      if value["size"] > 0:
        gvar = M.global_vars.add()
        gvar.name = value["name"]
        gvar.ea = value["addr"]
        gvar.size = value["size"]
      else:
        DEBUG("Look for {}".format(pprint.pformat(value)))
        
    #for func in M.funcs:
    #  DEBUG("Function name {}".format(func.name))
    #  for sv in func.stackvars:
    #    DEBUG_PUSH()
    #    DEBUG("{} : {}, ".format(sv.name, sv.sp_offset))
    #    DEBUG_POP()
        
            
    with open(out_file, "w") as outf:
      outf.write(M.SerializeToString())
     
  DEBUG("Global Vars\n")
  DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES)))
  DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES)))
  DEBUG("End Global Vars\n")
Ejemplo n.º 23
0
    def loadELF(self, filename):
        try:
            elf = ELFFile(open(filename, 'rb'))
        except:
            raise Exception("[-] This file is not an ELF file: %s" % filename)

        self.arch = elf.get_machine_arch()
        self.entry = elf.header.e_entry
        self.memory = self.load_code_segments(elf.iter_segments(), filename)
        self.symtab, self.thumbtab, self.code_addrs = self.load_section_info(elf.iter_sections())

        self.thumbtab.sort(key=lambda tup: tup[0])
        self.code_addrs = sorted(self.code_addrs, key=lambda k: k['address'])
Ejemplo n.º 24
0
    def __init__(self,
                 image,
                 trace=False,
                 syms=False,
                 timeout=None,
                 preformatted_image=os.path.join('..', 'floppy.img.zip'),
                 argv=None,
                 keep_temps=False,
                 qemu_opts=[]):
        self.image = image
        self.trace = trace
        self.syms = syms
        self.timeout = timeout
        self.argv = argv
        self.keep_temps = keep_temps
        self.qemu_opts = qemu_opts

        assert os.path.exists(self.image)

        with open(self.image, 'rb') as fd:
            elffile = ELFFile(fd)
            if elffile.get_machine_arch() == 'x86':
                self.arch = 'X86'
            else:
                raise RuntimeError("Unknown architecture: %s" %
                                   elf.get_machine_arch())

            if syms:
                # Get the symbols in the file.
                self.symbols = {}
                for section in elffile.iter_sections():
                    if not isinstance(section, SymbolTableSection):
                        continue

                    for sym in section.iter_symbols():
                        self.symbols[sym['st_value']] = sym.name

        if self.arch == 'X86':
            if os.environ.get('MODEL', '').lower() == 'bochs':
                self.model = Bochs('bochs', [])
            else:
                self.model = Qemu('qemu-system-i386', self.qemu_opts)

        else:
            raise RuntimeError("Unknown architecture: %s" % self.arch)

        fd, self.tmpimage = tempfile.mkstemp()
        os.close(fd)
        self.floppy_image = Image(self.tmpimage, preformatted_image)
        self.floppy_image.create_grub_conf(args=self.argv)
        self.floppy_image.copy(self.image, '/kernel')
Ejemplo n.º 25
0
class Elf(Binary):
    def __init__(self, filename):
        super().__init__(filename)
        self.elf = ELFFile(open(filename, 'rb'))
        self.arch = {
            'x86': 'i386',
            'x64': 'amd64'
        }[self.elf.get_machine_arch()]
        assert self.elf.header.e_type in ['ET_DYN', 'ET_EXEC', 'ET_CORE']

        # Get interpreter elf
        self.interpreter = None

        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type != 'PT_INTERP':
                continue
            self.interpreter = Elf(elf_segment.data()[:-1])
            break
        if self.interpreter is not None:
            assert self.interpreter.arch == self.arch
            assert self.interpreter.elf.header.e_type in ['ET_DYN', 'ET_EXEC']

    def maps(self):
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0:
                continue

            flags = elf_segment.header.p_flags
            # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read
            perms = ['   ', '  x', ' w ', ' wx', 'r  ', 'r x', 'rw ',
                     'rwx'][flags & 7]
            if 'r' not in perms:
                raise Exception("Not readable map from cgc elf not supported")

            # CGCMAP--
            assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0
            yield ((elf_segment.header.p_vaddr, elf_segment.header.p_memsz,
                    perms, elf_segment.stream.name,
                    elf_segment.header.p_offset, elf_segment.header.p_filesz))

    def getInterpreter(self):
        """Get the dynamic linker
        Returns the dynamic linker(if it is specified) as an :obj:`Elf` object otherwise, return none.

        :rtype: :obj:`Elf` or None
        """
        return self.interpreter

    def threads(self):
        yield (('Running', {'EIP': self.elf.header.e_entry}))
Ejemplo n.º 26
0
    def loadELF(self, filename):
        try:
            elf = ELFFile(open(filename, 'rb'))
        except:
            raise Exception("[-] This file is not an ELF file: %s" % filename)

        self.arch = elf.get_machine_arch()
        self.entry = elf.header.e_entry
        self.memory = self.load_code_segments(elf.iter_segments(), filename)
        self.symtab, self.thumbtab, self.code_addrs = self.load_section_info(
            elf.iter_sections())

        self.thumbtab.sort(key=lambda tup: tup[0])
        self.code_addrs = sorted(self.code_addrs, key=lambda k: k['address'])
Ejemplo n.º 27
0
    def parse_elf(self):
        self.f = open(self.f, 'rb')  #read binary form commad line

        elff = ELFFile(self.f)

        arch = elff.get_machine_arch()
        if arch == "x64":
            cs_arch = CS_ARCH_X86
            cs_mode = CS_MODE_64
        elif arch == "x86":
            cs_arch = CS_ARCH_X86
            cs_mode = CS_MODE_32
        else:
            print("ELF architecture '%s' currently not supported" % arch)
            return
        """ Initialize capstone """
        self.md = Cs(cs_arch, cs_mode)

        s = elff.get_section(1)
        self.align = s['sh_addr'] - s['sh_offset']

        s = elff.get_section_by_name('.plt')
        if s:
            print('.plt')
            self.plt_start, self.plt_end = s[
                'sh_addr'], s['sh_addr'] + s['sh_size']
            self.plti = self.plt_start + 16
            print('0x%x 0x%x' % (self.plt_start, self.plt_end))

            s = elff.get_section_by_name('.dynsym')
            if s:
                print(s.name)
                syms = self.parse_symbols(s)
                if self.to_look:
                    self.f.close()
                    return syms
            else:
                print('No Dynamic Symbols table (.dynsym)')

            s = elff.get_section_by_name('.symtab')
            if s:
                print(s.name)
                self.parse_symbols(s)
            else:
                print('No Symbols Table (.symtab)')
        else:
            print('No plt table (.plt)')

        self.f.close()
        return self.plts
Ejemplo n.º 28
0
def get_func_bounds(filename, function_name):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)
        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Find the function
                if DIE.tag == "DW_TAG_subprogram":
                    fname = ""
                    high_addr = 0
                    low_addr = 0
                    c = False
                    for attr in itervalues(DIE.attributes):
                        if attr.name == "DW_AT_name":
                            fname = attr.value
                        if attr.name == "DW_AT_low_pc":
                            low_addr = attr.value
                        if attr.name == "DW_AT_high_pc":
                            high_addr = attr.value
                    if high_addr < low_addr:
                        high_addr = low_addr + high_addr
                    if fname == function_name:
                        return (low_addr, high_addr)
Ejemplo n.º 29
0
    def test_basic(self):
        with open(os.path.join('test', 'testfiles_for_unittests',
                               'simple_gcc.elf.mips'), 'rb') as f:
            elf = ELFFile(f)
            self.assertEqual(elf.get_machine_arch(), 'MIPS')

            # Check some other properties of this ELF file derived from readelf
            self.assertEqual(elf['e_entry'], 0x0)
            self.assertEqual(elf.num_sections(), 25)
            self.assertEqual(elf.num_segments(), 0)

            # Test that Mips-specific section types work; these types are
            # available only when the file is identified as MIPS in the
            # e_machine header field.
            sec9 = elf.get_section(9)
            self.assertEqual(sec9['sh_type'], 'SHT_MIPS_DWARF')
Ejemplo n.º 30
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    if attribute_has_location_list(attr):
                        # This is a location list. Its value is an offset into
                        # the .debug_loc section, so we can use the location
                        # lists object to decode it.
                        loclist = location_lists.get_location_list_at_offset(
                            attr.value)

                        print('   DIE %s. attr %s.\n%s' % (
                            DIE.tag,
                            attr.name,
                            show_loclist(loclist, dwarfinfo, indent='      ')))
Ejemplo n.º 31
0
class CGCElf(Binary):
    @staticmethod
    def _cgc2elf(filename):
        # hack begin so we can use upstream Elftool
        with open(filename, 'rb') as fd:
            stream = io.BytesIO(fd.read())
            stream.write(b'\x7fELF')
            stream.name = fd.name
            return stream

    def __init__(self, filename):
        super().__init__(filename)
        stream = self._cgc2elf(filename)
        self.elf = ELFFile(stream)
        self.arch = {
            'x86': 'i386',
            'x64': 'amd64'
        }[self.elf.get_machine_arch()]

        assert 'i386' == self.arch
        assert self.elf.header.e_type in ['ET_EXEC']

    def maps(self):
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type not in [
                    'PT_LOAD', 'PT_NULL', 'PT_PHDR', 'PT_CGCPOV2'
            ]:
                raise Exception("Not Supported Section")

            if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0:
                continue

            flags = elf_segment.header.p_flags
            # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read
            perms = ['   ', '  x', ' w ', ' wx', 'r  ', 'r x', 'rw ',
                     'rwx'][flags & 7]
            if 'r' not in perms:
                raise Exception("Not readable map from cgc elf not supported")

            # CGCMAP--
            assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0
            yield ((elf_segment.header.p_vaddr, elf_segment.header.p_memsz,
                    perms, elf_segment.stream.name,
                    elf_segment.header.p_offset, elf_segment.header.p_filesz))

    def threads(self):
        yield (('Running', {'EIP': self.elf.header.e_entry}))
Ejemplo n.º 32
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    if attribute_has_location_list(attr):
                        # This is a location list. Its value is an offset into
                        # the .debug_loc section, so we can use the location
                        # lists object to decode it.
                        loclist = location_lists.get_location_list_at_offset(
                            attr.value)

                        print('   DIE %s. attr %s.\n%s' % (
                            DIE.tag,
                            attr.name,
                            show_loclist(loclist, dwarfinfo, indent='      ')))
Ejemplo n.º 33
0
Archivo: run.py Proyecto: berkus/JMTK
    def __init__(
        self,
        image,
        trace=False,
        syms=False,
        timeout=None,
        preformatted_image=os.path.join("..", "floppy.img.zip"),
        argv=None,
        keep_temps=False,
    ):
        self.image = image
        self.trace = trace
        self.syms = syms
        self.timeout = timeout
        self.argv = argv
        self.keep_temps = keep_temps

        assert os.path.exists(self.image)

        with open(self.image, "rb") as fd:
            elffile = ELFFile(fd)
            if elffile.get_machine_arch() == "x86":
                self.arch = "X86"
            else:
                raise RuntimeError("Unknown architecture: %s" % elf.get_machine_arch())

            if syms:
                # Get the symbols in the file.
                self.symbols = {}
                for section in elffile.iter_sections():
                    if not isinstance(section, SymbolTableSection):
                        continue

                    for sym in section.iter_symbols():
                        self.symbols[sym["st_value"]] = sym.name

        if self.arch == "X86":
            self.model = Qemu("qemu-system-i386", [])
        else:
            raise RuntimeError("Unknown architecture: %s" % self.arch)

        fd, self.tmpimage = tempfile.mkstemp()
        os.close(fd)
        self.floppy_image = Image(self.tmpimage, preformatted_image)
        self.floppy_image.create_grub_conf(args=self.argv)
        self.floppy_image.copy(self.image, "/kernel")
Ejemplo n.º 34
0
def get_executable_arch(path):
    """
    Returns the architecture of an executable binary

    Parameters
    ----------
    path : str
        path to the Go binaries generated

    Returns
    -------
    str
        Architecture type of the generated binaries
    """
    with open(str(path), "rb") as f:
        e = ELFFile(f)
        return e.get_machine_arch()
    def test_basic(self):
        with open(
                os.path.join('test', 'testfiles_for_unittests',
                             'simple_gcc.elf.mips'), 'rb') as f:
            elf = ELFFile(f)
            self.assertEqual(elf.get_machine_arch(), 'MIPS')

            # Check some other properties of this ELF file derived from readelf
            self.assertEqual(elf['e_entry'], 0x0)
            self.assertEqual(elf.num_sections(), 25)
            self.assertEqual(elf.num_segments(), 0)

            # Test that Mips-specific section types work; these types are
            # available only when the file is identified as MIPS in the
            # e_machine header field.
            sec9 = elf.get_section(9)
            self.assertEqual(sec9['sh_type'], 'SHT_MIPS_DWARF')
Ejemplo n.º 36
0
def process_file(filename):
    #print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)
        if not elffile.has_dwarf_info():
            raise ValueError(filename + ' has no DWARF info')
        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()
        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())
        alldies = OrderedDict()
        for CU in dwarfinfo.iter_CUs():
            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                alldies[DIE.offset] = DIE
    return alldies
Ejemplo n.º 37
0
Archivo: run.py Proyecto: RobinVan/JMTK
    def __init__(self, image, trace=False, syms=False, timeout=None,
                 preformatted_image=os.path.join('..','floppy.img.zip'),
                 argv=None, keep_temps=False, qemu_opts=[]):
        self.image = image
        self.trace = trace
        self.syms = syms
        self.timeout = timeout
        self.argv = argv
        self.keep_temps = keep_temps
        self.qemu_opts = qemu_opts

        assert os.path.exists(self.image)
        
        with open(self.image, 'rb') as fd:
            elffile = ELFFile(fd)
            if elffile.get_machine_arch() == 'x86':
                self.arch = 'X86'
            else:
                raise RuntimeError("Unknown architecture: %s" % elf.get_machine_arch())

            if syms:
                # Get the symbols in the file.
                self.symbols = {}
                for section in elffile.iter_sections():
                    if not isinstance(section, SymbolTableSection):
                        continue

                    for sym in section.iter_symbols():
                        self.symbols[sym['st_value']] = sym.name

        if self.arch == 'X86':
            if os.environ.get('MODEL', '').lower() == 'bochs':
                self.model = Bochs('bochs', [])
            else:
                self.model = Qemu('qemu-system-i386', self.qemu_opts)

        else:
            raise RuntimeError("Unknown architecture: %s" % self.arch)

        fd, self.tmpimage = tempfile.mkstemp()
        os.close(fd)
        self.floppy_image = Image(self.tmpimage, preformatted_image)
        self.floppy_image.create_grub_conf(args=self.argv)
        self.floppy_image.copy(self.image, '/kernel')
Ejemplo n.º 38
0
class Elf(Binary):
    def __init__(self, filename):
        super(Elf, self).__init__(filename)
        self.elf = ELFFile(file(filename)) 
        self.arch = {'x86':'i386','x64':'amd64'}[self.elf.get_machine_arch()]
        assert self.elf.header.e_type in ['ET_DYN', 'ET_EXEC', 'ET_CORE']


        #Get interpreter elf
        self.interpreter = None
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type != 'PT_INTERP':
                continue
            self.interpreter = Elf(elf_segment.data()[:-1])
            break
        if not self.interpreter is None:
            assert self.interpreter.arch == self.arch
            assert self.interpreter.elf.header.e_type in ['ET_DYN', 'ET_EXEC']


    def maps(self):
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0:
                continue

            flags = elf_segment.header.p_flags
            #PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read
            perms = ['   ', '  x', ' w ', ' wx', 'r  ', 'r x', 'rw ', 'rwx'][flags&7]
            if 'r' not in perms:
                raise Exception("Not readable map from cgc elf not supported")

            #CGCMAP--
            assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 
            yield((elf_segment.header.p_vaddr,
                  elf_segment.header.p_memsz,
                  perms, 
                  elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz))

    def getInterpreter(self):
        return self.interpreter

    def threads(self):
        yield(('Running', {'EIP': self.elf.header.e_entry}))
Ejemplo n.º 39
0
class CGCElf(Binary):

    @staticmethod
    def _cgc2elf(filename):
        #hack begin so we can use upstream Elftool
        with open(filename, 'rb') as fd:
            stream = StringIO.StringIO(fd.read())
            stream.write('\x7fELF')
            stream.name = fd.name
            return stream

    def __init__(self, filename):
        super(CGCElf, self).__init__(filename)
        stream = self._cgc2elf(filename)
        self.elf = ELFFile(stream)
        self.arch = {'x86':'i386','x64':'amd64'}[self.elf.get_machine_arch()]

        assert 'i386' == self.arch
        assert self.elf.header.e_type in ['ET_EXEC']

    def maps(self):
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type not in ['PT_LOAD', 'PT_NULL', 'PT_PHDR', 'PT_CGCPOV2']:
                raise Exception("Not Supported Section")

            if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0:
                continue

            flags = elf_segment.header.p_flags
            #PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read
            perms = ['   ', '  x', ' w ', ' wx', 'r  ', 'r x', 'rw ', 'rwx'][flags&7]
            if 'r' not in perms:
                raise Exception("Not readable map from cgc elf not supported")

            #CGCMAP--
            assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 
            yield((elf_segment.header.p_vaddr,
                  elf_segment.header.p_memsz,
                  perms, 
                  elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz))

    def threads(self):
        yield(('Running', {'EIP': self.elf.header.e_entry}))
Ejemplo n.º 40
0
def main(argv):
    elf_file = ""
    mode = ""
    try:
        opts, args = getopt.getopt(argv, "b:m:", ["elf_file=", "mode="])
    except getopt.GetoptError:
        help_msg()
        sys.exit(2)

    if len(argv) == 0:
        help_msg()
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h" or opt == "":
            help_msg()
            sys.exit(2)
        elif "-b" in opt:
            elf_file = arg
        elif "-m" in opt:
            mode = arg

    print("[+] load elf     : " + elf_file)
    print("[+] using mode   : " + str(mode))
    with open(elf_file, 'rb') as f:
        e = ELFFile(f)
    binarch = e.get_machine_arch()
    if mode == '10' and binarch == "x86":
        payload = load_elfx86(elf_file)
        print("\n\n")
        print(payload)

    elif mode == "20" and binarch == "x64":
        payload = load_elfx64(elf_file)
        print("\n\n")
        print(payload)
    else:
        print("[!] not supported")
        help_msg()
Ejemplo n.º 41
0
def main(path_to_sample):
    global dwarf_info
    global location_lists
    global call_frame_information_entries

    with open(path_to_sample, 'rb') as f:
        pyelftools_elf_file = ELFFile(f)
        #print elffile.little_endian
        assert pyelftools_elf_file.has_dwarf_info(), 'file has no DWARF info'

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarf_info = pyelftools_elf_file.get_dwarf_info()

        call_frame_information_entries = dwarf_info.CFI_entries()

        location_lists = dwarf_info.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(pyelftools_elf_file.get_machine_arch())

        # DWARFInfo allows to iterate over the compile units contained in
        # the .debug_info section. CU is a CompileUnit object, with some
        # computed attributes (such as its offset in the section) and
        # a header which conforms to the DWARF standard. The access to
        # header elements is, as usual, via item-lookup.
        for compile_unit in dwarf_info.iter_CUs():
            process_compile_unit(dwarf_info, pyelftools_elf_file, compile_unit)

        with open('funcs.json', 'wb') as f:
            # remove dw_op_call_frame_cda, as it is not serializable
            map(lambda f: f.pop('dw_op_call_frame_cfa'), functions)
            # convert into a nice dict
            funcs = {f['address']: f for f in functions}
            #import IPython; IPython.embed()
            f.write(json.dumps(funcs))
Ejemplo n.º 42
0
def main():
    source_dir = "/mnt/mnt/libcdb/libc"
    snippet_size = 32
    test_amount = 100
    random_seed = 'looks_like_someone_fucked_with_your_RNG'

    db = kiss.DatabaseInstance(source_dir, "result")
    db.generate_database_to_file()
    db.construct_from_file()
    # print("DONE constructing database")
    return

    random.seed(random_seed)

    start_time = time.time()
    search_engine = kiss.SearchEngine("result.cdb", "result.ofst")
    database_load_time = time.time() - start_time

    print("AAAAAAAA")
    # Go through files recursivly and pick a snippet in their .text segment at random
    file_list = []
    data_sizes = []
    for file_name in glob.glob(source_dir + '/**/*', recursive=True):
        if os.path.islink(file_name):
            continue
        try:
            with open(file_name, 'rb') as f:
                elffile = ELFFile(f)
                if (elffile.get_machine_arch() != 'x64'):
                    continue
                if (elffile.elfclass != 64):
                    continue

                data_size = len(elffile.get_section_by_name('.text').data())
                # print(file_name)
                # print("text_offset: " + hex(elffile.get_section_by_name('.text')['sh_offset']))
                # return
                file_list.append([file_name, data_size])
                data_sizes.append(data_size)
        except Exception:
            continue
    print("test base has {} files".format(len(file_list)))

    # Make random selections in the testbase
    test_locations = {}
    for _ in range(0, tests_amount):
        # Choose with relative weights
        chosen = random.choices(population=file_list, weights=data_sizes)[0]

        position = random.randint(0, chosen[1] - SNIPPET_SIZE)
        if (chosen[0] in test_locations):
            test_locations[chosen[0]].append(position)
        else:
            test_locations[chosen[0]] = [position]

    # Statistics
    mp.set_start_method('fork')
    manager = mp.Manager()
    stats = manager.dict()
    stats["unidentified"] = 0
    stats["no_fun_name"] = 0
    stats["wrong_fun_name"] = 0
    stats["time_sum"] = 0
    stats["wrong_hit_count"] = manager.dict()
    # stats["counter"] = 0

    # fork off tests
    processes_num = min(NUM_PROCESSES, len(test_locations))
    splitted_test_locations = chunks(test_locations,
                                     int(tests_amount / processes_num))

    jobs = []
    for l in splitted_test_locations:
        p = mp.Process(target=forked_test, args=(search_engine, l, stats))
        jobs.append(p)
        p.start()

    # wait for them all to finish
    for proc in jobs:
        proc.join()

    time_avg = stats["time_sum"] / tests_amount
    print("\n\nTested {} snippets of size {} and had".format(
        tests_amount, SNIPPET_SIZE))
    print("\t\t{} succesful identifications".format(tests_amount -
                                                    stats["unidentified"]))
    # print("\t\t{} false positives".format(sum(stats["wrong_hit_count"])))
    # print("\t\t{} average amount of wrong hits per function".format(statistics.mean(stats["wrong_hit_count"])))
    # print("\t\t{} stdev of amount of wrong hits per function".format(statistics.stdev(stats["wrong_hit_count"])))
    print("\t\t{} wrong function names".format(stats["wrong_fun_name"]))
    print("\t\tthe average query time was {} seconds".format(time_avg))
    print("\t\tthe library was loaded in {}".format(database_load_time))

    # convert wrong_hit_count dict to list
    # wrong_hit_count_list = []

    # print(wrong_hit_count_list)
    with open('false_positives.csv', 'w') as f:  # Just use 'w' mode in 3.x
        writer = csv.writer(f, delimiter=',')
        writer.writerow(["false_positives", "amount"])
        for k, v in stats["wrong_hit_count"].items():
            # wrong_hit_count_list.append([k, v])
            writer.writerow([k, v])
Ejemplo n.º 43
0
if __name__ == "__main__":
    # Validate command line args
    try:
        libcoldstart_path = sys.argv[1]
    except IndexError:
        libcoldstart_path = os.path.join(os.getcwd(), "libcoldstart.so")
    try:
        new_path = sys.argv[2]
    except IndexError:
        new_path = os.path.join(os.path.dirname(libcoldstart_path),
                                "libcoldstart-patched.so")

    f = open(libcoldstart_path, "rb")
    # Validate input file
    elf = ELFFile(f)
    arch = elf.get_machine_arch()

    if arch not in ("ARM", "x86", "AArch64"):
        print(
            "[!] ERROR: Unknown architecture in libcoldstart.so, this script only supports ARM and x86!"
        )

    shutil.copyfile(libcoldstart_path, new_path)

    patched = False
    patcher13 = TLS13Patcher(f, elf, arch, new_path)
    if patcher13.find_error_strings():
        print("[+] Patching TLS1.3 stack!")
        patcher13.patch()
        patched = True
    else:
Ejemplo n.º 44
0
class ELF:
    def __init__(self, classbinary, filename):
        import capstone as CAPSTONE

        fd = open(filename, "rb")
        self.elf = ELFFile(fd)
        self.classbinary = classbinary
        self.__data_sections = []
        self.__data_sections_content = []
        self.__exec_sections = []

        self.arch_lookup = {
            "x86": CAPSTONE.CS_ARCH_X86,
            "x64": CAPSTONE.CS_ARCH_X86,
            "ARM": CAPSTONE.CS_ARCH_ARM,
        }

        self.arch_mode_lookup = {
            "x86": CAPSTONE.CS_MODE_32,
            "x64": CAPSTONE.CS_MODE_64,
            "ARM": CAPSTONE.CS_ARCH_ARM,
        }


    def load_static_sym(self):
        symtab = self.elf.get_section_by_name(b".symtab")
        if symtab is None:
            return
        for sy in symtab.iter_symbols():
            if sy.entry.st_value != 0 and sy.name != b"":
                self.classbinary.reverse_symbols[sy.entry.st_value] = sy.name.decode()
                self.classbinary.symbols[sy.name.decode()] = sy.entry.st_value
            # print("%x\t%s" % (sy.entry.st_value, sy.name.decode()))


    def load_dyn_sym(self):
        rel = (self.elf.get_section_by_name(b".rela.plt") or
                self.elf.get_section_by_name(b".rel.plt"))
        dyn = self.elf.get_section_by_name(b".dynsym")

        if rel is None or dyn is None:
            return

        # TODO : are constants ?
        PLT_SIZE = {
            "x86": 16,
            "x64": 16,
            "ARM": 12,
        }

        PLT_FIRST_ENTRY_OFF = {
            "x86": 16,
            "x64": 16,
            "ARM": 20,
        }

        arch = self.elf.get_machine_arch()

        relitems = list(rel.iter_relocations())
        dynsym = list(dyn.iter_symbols())

        plt = self.elf.get_section_by_name(b".plt") 
        plt_entry_size = PLT_SIZE[arch]

        off = plt.header.sh_addr + PLT_FIRST_ENTRY_OFF[arch]
        k = 0

        while off < plt.header.sh_addr + plt.header.sh_size :
            idx = relitems[k].entry.r_info_sym
            name = dynsym[idx].name.decode()
            self.classbinary.reverse_symbols[off] = name + "@plt"
            self.classbinary.symbols[name + "@plt"] = off
            off += plt_entry_size
            k += 1


    def load_data_sections(self):
        for s in self.elf.iter_sections():
            if self.__section_is_data(s):
                self.__data_sections.append(s)
                self.__data_sections_content.append(s.data())


    def __get_data_section_idx(self, addr):
        for i, s in enumerate(self.__data_sections):
            start = s.header.sh_addr
            end = start + s.header.sh_size
            if start <= addr < end:
                return i
        return -1


    def __section_is_data(self, s):
        mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC
        return s.header.sh_flags & mask and not self.__section_is_exec(s)


    def is_address(self, imm):
        for s in self.elf.iter_sections():
            start = s.header.sh_addr
            if start == 0:
                continue
            end = start + s.header.sh_size
            if  start <= imm < end:
                return s.name.decode(), self.__section_is_data(s)
        return None, False


    def __get_cached_exec_section(self, addr):
        for s in self.__exec_sections:
            start = s.header.sh_addr
            end = start + s.header.sh_size
            if start <= addr < end:
                return s
        return None


    def __find_section(self, addr):
        for s in self.elf.iter_sections():
            start = s.header.sh_addr
            end = start + s.header.sh_size
            if  start <= addr < end:
                return s
        return None


    def __get_section(self, addr):
        s = self.__get_cached_exec_section(addr)
        if s is not None:
            return s
        s = self.__find_section(addr)
        if s is None:
            return None
        self.__exec_sections.append(s)
        return s


    def check_addr(self, addr):
        s = self.__get_section(addr)
        return (s is not None, self.__section_is_exec(s))


    def get_section_start(self, addr):
        s = self.__get_section(addr)
        if s is None:
            return 0
        return s.header.sh_addr


    def section_stream_read(self, addr, size):
        s = self.__get_section(addr)
        off = addr - s.header.sh_addr
        s.stream.seek(s.header.sh_offset + off)
        return s.stream.read(size)


    def __section_is_exec(self, s):
        return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR


    def get_string(self, addr, max_data_size):
        i = self.__get_data_section_idx(addr)
        if i == -1:
            return ""

        s = self.__data_sections[i]
        data = self.__data_sections_content[i]
        off = addr - s.header.sh_addr
        txt = ['"']

        i = 0
        while i < max_data_size and \
              off < s.header.sh_size:
            c = data[off]
            if c == 0:
                break
            txt.append(lib.utils.get_char(c))
            off += 1
            i += 1

        if c != 0 and off != s.header.sh_size:
            txt.append("...")

        return ''.join(txt) + '"'


    def get_arch(self):
        return self.arch_lookup.get(self.elf.get_machine_arch(), None), \
               self.arch_mode_lookup.get(self.elf.get_machine_arch(), None)


    def get_arch_string(self):
        return self.elf.get_machine_arch()


    def get_entry_point(self):
        return self.elf.header['e_entry']


    def iter_sections(self):
        for s in self.elf.iter_sections():
            start = s.header.sh_addr
            end = start + s.header.sh_size
            if s.name != b"":
                yield (s.name.decode(), start, end)
Ejemplo n.º 45
0
class ELFExecutable(BaseExecutable):
    def __init__(self, file_path):
        super(ELFExecutable, self).__init__(file_path)

        self.helper = ELFFile(self.binary)

        self.architecture = self._identify_arch()

        if self.architecture is None:
            raise Exception('Architecture is not recognized')

        logging.debug('Initialized {} {} with file \'{}\''.format(self.architecture, type(self).__name__, file_path))

        self.pack_endianness = '<' if self.helper.little_endian else '>'
        self.address_pack_type = 'I' if self.helper.elfclass == 32 else 'Q'

        self.sections = [section_from_elf_section(s) for s in self.helper.iter_sections()]

        self.executable_segment = [s for s in self.helper.iter_segments() if s['p_type'] == 'PT_LOAD' and s['p_flags'] & 0x1][0]

        dyn = self.helper.get_section_by_name('.dynamic')
        if dyn:
            self.libraries = [t.needed for t in dyn.iter_tags() if t['d_tag'] == 'DT_NEEDED']

        self.next_injection_offset = None
        self.next_injection_vaddr = None

    def _identify_arch(self):
        machine = self.helper.get_machine_arch()
        if machine == 'x86':
            return ARCHITECTURE.X86
        elif machine == 'x64':
            return ARCHITECTURE.X86_64
        elif machine == 'ARM':
            return ARCHITECTURE.ARM
        elif machine == 'AArch64':
            return ARCHITECTURE.ARM_64
        else:
            return None

    def entry_point(self):
        return self.helper['e_entry']

    def executable_segment_vaddr(self):
        return self.executable_segment['p_vaddr']

    def executable_segment_size(self):
        # TODO: Maybe limit this because we use this as part of our injection method?
        return self.executable_segment['p_memsz']

    def iter_string_sections(self):
        STRING_SECTIONS = ['.rodata', '.data', '.bss']
        for s in self.sections:
            if s.name in STRING_SECTIONS:
                yield s

    def _extract_symbol_table(self):
        # Add in symbols from the PLT/rela.plt
        # .rela.plt contains indexes to reference both .dynsym (symbol names) and .plt (jumps to GOT)
        if self.is_64_bit():
            reloc_section = self.helper.get_section_by_name('.rela.plt')
        else:
            reloc_section = self.helper.get_section_by_name('.rel.plt')

        if reloc_section:
            dynsym = self.helper.get_section(reloc_section['sh_link']) # .dynsym
            if isinstance(dynsym, SymbolTableSection):
                plt = self.helper.get_section_by_name('.plt')
                for idx, reloc in enumerate(reloc_section.iter_relocations()):
                    # Get the symbol's name from dynsym
                    symbol_name = dynsym.get_symbol(reloc['r_info_sym']).name

                    # The address of this function in the PLT is the base PLT offset + the index of the relocation.
                    # However, since there is the extra "trampoline" entity at the top of the PLT, we need to add one to the
                    # index to account for it.

                    # While sh_entsize is sometimes defined, it appears to be incorrect in some cases so we just ignore that
                    # and calculate it based off of the total size / num_relocations (plus the trampoline entity)
                    entsize = (plt['sh_size'] / (reloc_section.num_relocations() + 1))

                    plt_addr = plt['sh_addr'] + ((idx+1) * entsize)

                    logging.debug('Directly adding PLT function {} at vaddr {}'.format(symbol_name, hex(plt_addr)))

                    f = Function(plt_addr,
                                 entsize,
                                 symbol_name + '@PLT',
                                 self,
                                 type=Function.DYNAMIC_FUNC)
                    self.functions[plt_addr] = f
            else:
                logging.debug('Relocation section had sh_link to {}. Not parsing symbols...'.format(dynsym))



        # Some things in the symtab have st_size = 0 which confuses analysis later on. To solve this, we keep track of
        # where each address is in the `function_vaddrs` set and go back after all symbols have been iterated to compute
        # size by taking the difference between the current address and the next recorded address.

        # We do this for each executable section so that the produced functions cannot span multiple sections.

        for section in self.helper.iter_sections():
            if self.executable_segment.section_in_segment(section):
                name_for_addr = {}

                function_vaddrs = set([section['sh_addr'] + section['sh_size']])

                symbol_table = self.helper.get_section_by_name('.symtab')
                if symbol_table:
                    for symbol in symbol_table.iter_symbols():
                        if symbol['st_info']['type'] == 'STT_FUNC' and symbol['st_shndx'] != 'SHN_UNDEF':
                            if section['sh_addr'] <= symbol['st_value'] < section['sh_addr'] + section['sh_size']:
                                name_for_addr[symbol['st_value']] = symbol.name
                                function_vaddrs.add(symbol['st_value'])

                                if symbol['st_size']:
                                    logging.debug('Eagerly adding function {} from .symtab at vaddr {} with size {}'
                                                  .format(symbol.name, hex(symbol['st_value']), hex(symbol['st_size'])))
                                    f = Function(symbol['st_value'],
                                                 symbol['st_size'],
                                                 symbol.name,
                                                 self)
                                    self.functions[symbol['st_value']] = f


                function_vaddrs = sorted(list(function_vaddrs))

                for cur_addr, next_addr in zip(function_vaddrs[:-1], function_vaddrs[1:]):
                    # If st_size was set, we already added the function above, so don't add it again.
                    if cur_addr not in self.functions:
                        func_name = name_for_addr[cur_addr]
                        size = next_addr - cur_addr
                        logging.debug('Lazily adding function {} from .symtab at vaddr {} with size {}'
                                      .format(func_name, hex(cur_addr), hex(size)))
                        f = Function(cur_addr,
                                     next_addr - cur_addr,
                                     name_for_addr[cur_addr],
                                     self,
                                     type=Function.DYNAMIC_FUNC)
                        self.functions[cur_addr] = f

        # TODO: Automatically find and label main from call to libc_start_main

    def prepare_for_injection(self):
        """
        Derived from http://vxheavens.com/lib/vsc01.html
        """
        modified = StringIO(self.binary.getvalue())

        # Add INJECTION_SIZE to the section header list offset to make room for our injected code
        elf_hdr = self.helper.header.copy()
        elf_hdr.e_shoff += INJECTION_SIZE
        logging.debug('Changing e_shoff to {}'.format(elf_hdr.e_shoff))

        modified.seek(0)
        modified.write(self.helper.structs.Elf_Ehdr.build(elf_hdr))

        # Find the main RX LOAD segment and also adjust other segment offsets along the way
        executable_segment = None

        for segment_idx, segment in enumerate(self.helper.iter_segments()):
            segment_hdr = segment.header.copy()
            segment_hdr_offset = self.helper._segment_offset(segment_idx)

            if executable_segment is not None:
                # Already past the executable segment, so just update the offset if needed (i.e. don't update things
                # that come before the expanded section)
                if segment_hdr.p_offset > last_exec_section['sh_offset']:
                    segment_hdr.p_offset += INJECTION_SIZE

            elif segment['p_type'] == 'PT_LOAD' and segment['p_flags'] & P_FLAGS.PF_X:
                # Found the executable LOAD segment.
                # Make room for our injected code.

                logging.debug('Found executable LOAD segment at index {}'.format(segment_idx))
                executable_segment = segment

                last_exec_section_idx = max([idx for idx in range(self.helper.num_sections()) if
                                             executable_segment.section_in_segment(self.helper.get_section(idx))])
                last_exec_section = self.helper.get_section(last_exec_section_idx)

                segment_hdr.p_filesz += INJECTION_SIZE
                segment_hdr.p_memsz += INJECTION_SIZE

                logging.debug('Rewriting segment filesize and memsize to {} and {}'.format(
                    segment_hdr.p_filesz, segment_hdr.p_memsz)
                )

            modified.seek(segment_hdr_offset)
            modified.write(self.helper.structs.Elf_Phdr.build(segment_hdr))

        if executable_segment is None:
            logging.error("Could not locate an executable LOAD segment. Cannot continue injection.")
            return False

        logging.debug('Last section in executable LOAD segment is at index {} ({})'.format(last_exec_section_idx,
                                                                                           last_exec_section.name))

        self.next_injection_offset = last_exec_section['sh_offset'] + last_exec_section['sh_size']
        self.next_injection_vaddr = last_exec_section['sh_addr'] + last_exec_section['sh_size']

        # Update sh_size for the section we grew
        section_header_offset = self.helper._section_offset(last_exec_section_idx)
        section_header = last_exec_section.header.copy()

        section_header.sh_size += INJECTION_SIZE

        modified.seek(section_header_offset)
        modified.write(self.helper.structs.Elf_Shdr.build(section_header))

        # Update sh_offset for each section past the last section in the executable segment
        for section_idx in range(last_exec_section_idx + 1, self.helper.num_sections()):
            section_header_offset = self.helper._section_offset(section_idx)
            section_header = self.helper.get_section(section_idx).header.copy()

            section_header.sh_offset += INJECTION_SIZE
            logging.debug('Rewriting section {}\'s offset to {}'.format(section_idx, section_header.sh_offset))

            modified.seek(section_header_offset)
            modified.write(self.helper.structs.Elf_Shdr.build(section_header))

        # TODO: Architecture-specific padding
        # Should be something that won't immediately crash, but can be caught (e.g. SIGTRAP on x86)
        modified = StringIO(modified.getvalue()[:self.next_injection_offset] +
                            '\xCC'*INJECTION_SIZE +
                            modified.getvalue()[self.next_injection_offset:])

        self.binary = modified
        self.helper = ELFFile(self.binary)

        return True

    def inject(self, asm, update_entry=False):
        if self.next_injection_offset is None or self.next_injection_vaddr is None:
            logging.warning(
                'prepare_for_injection() was not called before inject(). This may cause unexpected behavior')
            self.prepare_for_injection()

        for segment in self.helper.iter_segments():
            if segment['p_type'] == 'PT_LOAD' and segment['p_flags'] & P_FLAGS.PF_X:
                injection_section_idx = max(i for i in range(self.helper.num_sections()) if segment.section_in_segment(self.helper.get_section(i)))
                break

        injection_section = self.helper.get_section(injection_section_idx)

        # If we haven't injected code before or need to expand the section again for this injection, go ahead and
        # shift stuff around.
        if injection_section['sh_size'] < INJECTION_SIZE or \
                        injection_section['sh_offset'] + injection_section['sh_size'] < self.next_injection_offset + len(asm):
            logging.debug('Automatically expanding injection section to accommodate for assembly')

            # NOTE: Could this change the destination address for the code that gets injected?
            self.prepare_for_injection()
        elif self.next_injection_offset == 0:
            used_code_len = len(injection_section.data().rstrip('\xCC'))
            self.next_injection_offset = injection_section['sh_offset'] + used_code_len
            self.next_injection_vaddr = injection_section['sh_addr'] + used_code_len

        # "Inject" the assembly
        logging.debug('Injecting {} bytes of assembly at offset {}'.format(len(asm), self.next_injection_offset))
        self.binary.seek(self.next_injection_offset)
        self.binary.write(asm)

        # Update e_entry if requested
        if update_entry:
            logging.debug('Rewriting ELF entry address to {}'.format(self.next_injection_vaddr))
            elf_hdr = self.helper.header
            elf_hdr.e_entry = self.next_injection_vaddr

            self.binary.seek(0)
            self.binary.write(self.helper.structs.Elf_Ehdr.build(elf_hdr))

        self.helper = ELFFile(self.binary)

        self.next_injection_vaddr += len(asm)
        self.next_injection_offset += len(asm)

        return self.next_injection_vaddr - len(asm)
Ejemplo n.º 46
0
class ELF(object):
    def __init__(self, elf, name=''):
        """
        This constructor is overloaded and can accept either a string as the
        parameter 'elf', or a stream to ELF data. 'name' is only used when
        generating CapDL from the ELF file.
        """
        if isinstance(elf, str):
            f = open(elf, 'rb')
        else:
            f = elf
        self._elf = ELFFile(f)
        self.name = name
        self.symtab = {}

    def get_entry_point(self):
        return self._elf['e_entry']

    def _get_symbol(self, symbol):
        if symbol in self.symtab:
            return self.symtab[symbol]

        table = self._elf.get_section_by_name('.symtab')
        if not table:
            # This ELF file has been stripped.
            raise Exception('No symbol table available')

        for s in table.iter_symbols():
            self.symtab[s.name] = s
            if s.name == symbol:
                return s
        return None

    def get_symbol_vaddr(self, symbol):
        sym = self._get_symbol(symbol)
        if sym:
            return sym['st_value']
        return None

    def get_symbol_size(self, symbol):
        sym = self._get_symbol(symbol)
        if sym:
            return sym['st_size']
        return None

    def _safe_name(self):
        """
        Replace characters that the CapDL tools parse differently.
        """
        return re.sub(r'[^A-Za-z0-9]', '_', self.name)

    def get_arch(self):
        return self._elf.get_machine_arch()

    def get_pages(self, infer_asid=True, pd=None):
        """
        Returns a dictionary of pages keyed on base virtual address, that are
        required to ELF load this file. Each dictionary entry is a dictionary
        containing booleans 'read', 'write' and 'execute' for the permissions
        of the page.
        """
        pages = PageCollection(self._safe_name(), self.get_arch(), infer_asid, pd)
        for seg in self._elf.iter_segments():
            if not seg['p_type'] == 'PT_LOAD':
                continue
            if seg['p_memsz'] == 0:
                continue
            vaddr = round_down(int(seg['p_vaddr']))
            r = (seg['p_flags'] & P_FLAGS.PF_R) > 0
            w = (seg['p_flags'] & P_FLAGS.PF_W) > 0
            x = (seg['p_flags'] & P_FLAGS.PF_X) > 0
            map(lambda y: pages.add_page(y, r, w, x),
                xrange(vaddr, int(seg['p_vaddr']) + int(seg['p_memsz']),
                    PAGE_SIZE))
        return pages

    def get_spec(self, infer_tcb=True, infer_asid=True, pd=None):
        """
        Return a CapDL spec with as much information as can be derived from the
        ELF file in isolation.
        """
        pages = self.get_pages(infer_asid, pd)
        spec = pages.get_spec()

        if infer_tcb:
            # Create a single TCB.
            tcb = TCB('tcb_%s' % self._safe_name(), ip=self.get_entry_point(), \
                elf=self.name)
            spec.add_object(tcb)
            tcb['vspace'] = pages.get_page_directory()[1]

        return spec

    def __repr__(self):
        return str(self._elf)
Ejemplo n.º 47
0
class ELF:
    def __init__(self, classbinary, filename):
        fd = open(filename, "rb")
        self.elf = ELFFile(fd)
        self.classbinary = classbinary
        self.__data_sections = []
        self.__data_sections_data = []
        self.arch_lookup = {
          "x86": lib.fileformat.binary.ARCH_x86,
          "x64": lib.fileformat.binary.ARCH_x64
        }


    def load_static_sym(self):
        symtab = self.elf.get_section_by_name(b".symtab")
        if symtab is None:
            return
        for sy in symtab.iter_symbols():
            if sy.entry.st_value != 0 and sy.name != b"":
                self.classbinary.reverse_symbols[sy.entry.st_value] = sy.name.decode()
                self.classbinary.symbols[sy.name.decode()] = sy.entry.st_value
            # print("%x\t%s" % (sy.entry.st_value, sy.name.decode()))


    def load_dyn_sym(self):
        rel = (self.elf.get_section_by_name(b".rela.plt") or
                self.elf.get_section_by_name(b".rel.plt"))
        dyn = self.elf.get_section_by_name(b".dynsym")

        relitems = list(rel.iter_relocations())
        dynsym = list(dyn.iter_symbols())

        plt = self.elf.get_section_by_name(b".plt") 
        plt_entry_size = 16 # TODO

        off = plt.header.sh_addr + plt_entry_size
        k = 0

        while off < plt.header.sh_addr + plt.header.sh_size :
            idx = relitems[k].entry.r_info_sym
            name = dynsym[idx].name.decode()
            self.classbinary.reverse_symbols[off] = name + "@plt"
            self.classbinary.symbols[name + "@plt"] = off
            off += plt_entry_size
            k += 1


    def load_data_sections(self):
        for s in self.elf.iter_sections():
            if self.__section_is_data(s):
                self.__data_sections.append(s)
                self.__data_sections_data.append(s.data())


    def __get_data_section_idx(self, addr):
        for i, s in enumerate(self.__data_sections):
            start = s.header.sh_addr
            end = start + s.header.sh_size
            if start <= addr < end:
                return i
        return -1


    def __section_is_data(self, s):
        mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC
        return s.header.sh_flags & mask and not self.__section_is_exec(s)


    def is_address(self, imm):
        for s in self.elf.iter_sections():
            start = s.header.sh_addr
            if start == 0:
                continue
            end = start + s.header.sh_size
            if  start <= imm < end:
                return s.name.decode(), self.__section_is_data(s)
        return None, False


    def __find_section(self, addr):
        for s in self.elf.iter_sections():
            start = s.header.sh_addr
            end = start + s.header.sh_size
            if  start <= addr < end:
                return s
        return None


    def get_section(self, addr):
        s = self.__find_section(addr)
        flags = {
            "exec": self.__section_is_exec(s)
        }
        return (s.data(), s.header.sh_addr, flags)


    def __section_is_exec(self, s):
        return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR


    def get_string(self, addr):
        i = self.__get_data_section_idx(addr)
        if i == -1:
            return ""

        s = self.__data_sections[i]
        data = self.__data_sections_data[i]
        off = addr - s.header.sh_addr
        txt = ['"']

        i = 0
        while i < lib.fileformat.binary.MAX_STRING_DATA and \
              off < s.header.sh_size:
            c = data[off]
            if c == 0:
                break
            txt.append(lib.utils.get_char(c))
            off += 1
            i += 1

        if c != 0 and off != s.header.sh_size:
            txt.append("...")

        return ''.join(txt) + '"'


    def get_arch(self):
        return self.arch_lookup.get(self.elf.get_machine_arch(), \
            lib.fileformat.binary.ARCH_INVALID)


    def get_entry_point(self):
        return self.elf.header['e_entry']
Ejemplo n.º 48
0
class ELF(Binary):
    def __init__(self, db, filename):
        Binary.__init__(self)

        fd = open(filename, "rb")
        self.elf = ELFFile(fd)
        self.db = db

        self.__parsed_reloc_tables = set()
        self.dtags = {}
        self.jmprel = []
        self.dynamic_seg = None

        self.set_arch_name()

        if self.arch == "MIPS32":
            self.dynamic_tag_translation = {
                0x70000001: "DT_MIPS_RLD_VERSION",
                0x70000005: "DT_MIPS_FLAGS",
                0x70000006: "DT_MIPS_BASE_ADDRESS",
                0x7000000a: "DT_MIPS_LOCAL_GOTNO",
                0x70000011: "DT_MIPS_SYMTABNO",
                0x70000012: "DT_MIPS_UNREFEXTNO",
                0x70000013: "DT_MIPS_GOTSYM",
                0x70000016: "DT_MIPS_RLD_MAP",
                0x70000032: "DT_MIPS_PLTGOT"
            }
        elif self.arch == "MIPS64":
            self.dynamic_tag_translation = {
                0x70000001: "DT_MIPS_RLD_VERSION",
                0x70000005: "DT_MIPS_FLAGS",
                0x70000006: "DT_MIPS_BASE_ADDRESS",
                0x7000000a: "DT_MIPS_LOCAL_GOTNO",
                0x70000011: "DT_MIPS_SYMTABNO",
                0x70000012: "DT_MIPS_UNREFEXTNO",
                0x70000013: "DT_MIPS_GOTSYM",
                0x70000016: "DT_MIPS_RLD_MAP"
            }
        else:
            self.dynamic_tag_translation = {}

        reloc = 0

        # Load sections
        for s in self.elf.iter_sections():
            if not s.name:
                continue

            # Keep only sections R|W|X
            # TODO : is it sufficiant ?
            if s.header.sh_flags & 0xf == 0:
                continue

            name = s.name.decode()
            start = s.header.sh_addr

            if start == 0:
                start = reloc
                reloc += s.header.sh_size

            data = s.data()

            self.add_section(
                start,
                s.name.decode(),
                s.header.sh_size,
                len(data),
                self.__section_is_exec(s),
                self.__section_is_data(s),
                data)

        # Load segments
        rename_counter = 1
        seen = set()
        for seg in self.elf.iter_segments():
            if seg.header.p_type == "PT_DYNAMIC":
                self.dynamic_seg = seg

            if seg.header.p_type != "PT_LOAD":
                continue

            name = seg.header.p_type
            if name in seen:
                name += "_%d" % rename_counter
                rename_counter += 1

            seen.add(name)
            start = seg.header.p_vaddr
            bisect.insort_left(self._sorted_segments, start)

            is_data = self.__segment_is_data(seg)
            is_exec = self.__segment_is_exec(seg)
            data = seg.data()

            self._abs_segments[start] = SegmentAbs(
                    name,
                    start,
                    seg.header.p_memsz,
                    len(data),
                    is_exec,
                    is_data,
                    data,
                    seg.header.p_offset,
                    not self.elf.little_endian)

        # No section headers, we add segments in sections
        if len(self._abs_sections) == 0:
            self._abs_sections = self._abs_segments
            self._sorted_sections = self._sorted_segments


    def read_addr_at(self, ad):
        seg = self.get_segment(ad)
        if self.wordsize == 4:
            return seg.read_dword(ad)
        else:
            return seg.read_qword(ad)


    def __translate_dynamic_tag(self, tag):
        if isinstance(tag, int):
            return self.dynamic_tag_translation[tag]
        return tag


    def __get_offset(self, ad):
        seg = self.get_segment(ad)
        return seg.file_offset + ad - seg.start


    def load_dyn_sym(self):
        if self.dynamic_seg is None:
            return

        self.dtags = {}

        for tag in self.dynamic_seg.iter_tags():
            # Create a dictionary, mapping DT_* strings to their values
            tagstr = self.__translate_dynamic_tag(tag.entry.d_tag)
            self.dtags[tagstr] = tag.entry.d_val

        # None of the following things make sense without a string table
        if "DT_STRTAB" not in self.dtags:
            return

        # To handle binaries without section headers, we need to hack around
        # pyreadelf's assumptions make our own string table
        fakestrtabheader = {
            "sh_offset": self.__get_offset(self.dtags["DT_STRTAB"]),
        }
        strtab = StringTableSection(
                fakestrtabheader, "strtab_plasma", self.elf.stream)

        # ...
        # Here in CLE was checked the DT_SONAME 
        # ...

        # None of the following structures can be used without a symbol table
        if "DT_SYMTAB" not in self.dtags or "DT_SYMENT" not in self.dtags:
            return

        # Construct our own symbol table to hack around pyreadelf
        # assuming section headers are around
        fakesymtabheader = {
            "sh_offset": self.__get_offset(self.dtags["DT_SYMTAB"]),
            "sh_entsize": self.dtags["DT_SYMENT"],
            "sh_size": 0
        } # bogus size: no iteration allowed
        self.dynsym = SymbolTableSection(
                fakesymtabheader, "symtab_plasma", self.elf.stream,
                self.elf, strtab)

        # mips' relocations are absolutely screwed up, handle some of them here.
        self.__relocate_mips()

        # perform a lot of checks to figure out what kind of relocation
        # tables are around
        rela_type = None
        if "DT_PLTREL" in self.dtags:
            if self.dtags["DT_PLTREL"] == 7:
                rela_type = "RELA"
                relentsz = self.elf.structs.Elf_Rela.sizeof()
            elif self.dtags["DT_PLTREL"] == 17:
                rela_type = "REL"
                relentsz = self.elf.structs.Elf_Rel.sizeof()
            else:
                raise ExcElf("DT_PLTREL is not REL or RELA?")
        else:
            if "DT_RELA" in self.dtags:
                rela_type = "RELA"
                relentsz = self.elf.structs.Elf_Rela.sizeof()
            elif "DT_REL" in self.dtags:
                rela_type = "REL"
                relentsz = self.elf.structs.Elf_Rel.sizeof()
            else:
                return

        # try to parse relocations out of a table of type DT_REL{,A}
        if "DT_" + rela_type in self.dtags:
            reloffset = self.dtags["DT_" + rela_type]
            relsz = self.dtags["DT_" + rela_type + "SZ"]
            fakerelheader = {
                "sh_offset": self.__get_offset(reloffset),
                "sh_type": "SHT_" + rela_type,
                "sh_entsize": relentsz,
                "sh_size": relsz
            }
            reloc_sec = RelocationSection(
                    fakerelheader, "reloc_plasma",
                    self.elf.stream, self.elf)
            self.__register_relocs(reloc_sec)

        # try to parse relocations out of a table of type DT_JMPREL
        if "DT_JMPREL" in self.dtags:
            jmpreloffset = self.dtags["DT_JMPREL"]
            jmprelsz = self.dtags["DT_PLTRELSZ"]
            fakejmprelheader = {
                "sh_offset": self.__get_offset(jmpreloffset),
                "sh_type": "SHT_" + rela_type,
                "sh_entsize": relentsz,
                "sh_size": jmprelsz
            }
            jmprel_sec = RelocationSection(
                    fakejmprelheader, "jmprel_plasma",
                    self.elf.stream, self.elf)

            self.jmprel = self.__register_relocs(jmprel_sec)

        self.__resolve_plt()


    def __relocate_mips(self):
        if 'DT_MIPS_BASE_ADDRESS' not in self.dtags:
            return
        # The MIPS GOT is an array of addresses, simple as that.
        # number of local GOT entries
        got_local_num = self.dtags['DT_MIPS_LOCAL_GOTNO']

        # a.k.a the index of the first global GOT entry
        # index of first symbol w/ GOT entry
        symtab_got_idx = self.dtags['DT_MIPS_GOTSYM']

        symbol_count = self.dtags['DT_MIPS_SYMTABNO']
        gotaddr = self.dtags['DT_PLTGOT']

        for i in range(2, got_local_num):
            symbol = self.dynsym.get_symbol(i)
            reloc = MipsLocalReloc(self, symbol, gotaddr + i * self.wordsize)
            self.__save_symbol(reloc, reloc.symbol.entry.st_value)

        for i in range(symbol_count - symtab_got_idx):
            symbol = self.dynsym.get_symbol(i + symtab_got_idx)
            reloc = MipsGlobalReloc(self, symbol,
                            gotaddr + (i + got_local_num) * self.wordsize)
            self.__save_symbol(reloc, reloc.symbol.entry.st_value)
            self.jmprel.append(reloc)


    def __resolve_plt(self):
        # For PPC32 and PPC64 the address to save is 'got'

        if self.arch in ('x86', 'x64'):
            for rel in self.jmprel:
                got = rel.addr
                # 0x6 is the size of the plt's jmpq instruction in x86_64
                ad = self.read_addr_at(got) - 6
                self.__save_symbol(rel, ad)

        elif self.arch in ('ARM', 'AARCH64', 'MIPS32', 'MIPS64'):
            for rel in self.jmprel:
                got = rel.addr
                ad = self.read_addr_at(got)
                self.__save_symbol(rel, ad)


    def __save_symbol(self, rel, ad):
        if ad == 0:
            return

        name = rel.symbol.name.decode()

        if name in self.symbols:
            name = self.rename_sym(name)

        if rel.is_import:
            self.imports[ad] = True

        if self.is_function(rel.symbol):
            self.db.functions[ad] = None

        self.reverse_symbols[ad] = name
        self.symbols[name] = ad


    def __register_relocs(self, section):
        if section.header["sh_offset"] in self.__parsed_reloc_tables:
            return
        self.__parsed_reloc_tables.add(section.header["sh_offset"])

        relocs = []
        for r in section.iter_relocations():
            # MIPS64 is just plain old f****d up
            # https://www.sourceware.org/ml/libc-alpha/2003-03/msg00153.html
            if self.arch == "MIPS64":
                # Little endian addionally needs one of its fields reversed... WHY
                if self.elf.little_endian:
                    r.entry.r_info_sym = r.entry.r_info & 0xFFFFFFFF
                    r.entry.r_info = struct.unpack(">Q", struct.pack("<Q",
                            r.entry.r_info))[0]

                type_1 = r.entry.r_info & 0xFF
                type_2 = r.entry.r_info >> 8 & 0xFF
                type_3 = r.entry.r_info >> 16 & 0xFF
                extra_sym = r.entry.r_info >> 24 & 0xFF
                if extra_sym != 0:
                    die("r_info_extra_sym is nonzero??? PLEASE SEND HELP")

                sym = self.dynsym.get_symbol(r.entry.r_info_sym)

                if type_1 != 0:
                    r.entry.r_info_type = type_1
                    reloc = self._make_reloc(r, sym)
                    if reloc is not None:
                        relocs.append(reloc)
                        self.__save_symbol(reloc, reloc.symbol.entry.st_value)
                if type_2 != 0:
                    r.entry.r_info_type = type_2
                    reloc = self._make_reloc(r, sym)
                    if reloc is not None:
                        relocs.append(reloc)
                        self.__save_symbol(reloc, reloc.symbol.entry.st_value)
                if type_3 != 0:
                    r.entry.r_info_type = type_3
                    reloc = self._make_reloc(r, sym)
                    if reloc is not None:
                        relocs.append(reloc)
                        self.__save_symbol(reloc, reloc.symbol.entry.st_value)
            else:
                if "sh_link" in section.header:
                    symtab = self.reader.get_section(section.header["sh_link"])
                    sym = symtab.get_symbol(r.entry.r_info_sym)
                else:
                    sym = self.dynsym.get_symbol(r.entry.r_info_sym)

                reloc = self._make_reloc(r, sym)
                if reloc is not None:
                    relocs.append(reloc)
                    self.__save_symbol(reloc, reloc.symbol.entry.st_value)
        return relocs


    def _make_reloc(self, reloc_sec, symbol):
        addend = reloc_sec.entry.r_addend if reloc_sec.is_RELA() else None
        RelocClass = get_relocation(self.arch,
                                    reloc_sec.entry.r_info_type)
        if RelocClass is None:
            return None
        return RelocClass(self, symbol, reloc_sec.entry.r_offset, addend)


    def load_static_sym(self):
        symtab = self.elf.get_section_by_name(b".symtab")
        if symtab is None:
            return
        dont_save = [b"$a", b"$t", b"$d"]
        is_arm = self.arch == "ARM"

        for sy in symtab.iter_symbols():
            if is_arm and sy.name in dont_save:
                continue

            ad = sy.entry.st_value
            if ad != 0 and sy.name != b"":
                name = sy.name.decode()

                if self.is_address(ad):
                    if name in self.symbols:
                        name = self.rename_sym(name)

                    self.reverse_symbols[ad] = name
                    self.symbols[name] = ad

                    if self.is_function(sy):
                        self.db.functions[ad] = None


    def __section_is_data(self, s):
        mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC
        return s.header.sh_flags & mask and not self.__section_is_exec(s)


    def __section_is_exec(self, s):
        return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR


    def __segment_is_data(self, s):
        mask = P_FLAGS.PF_W | P_FLAGS.PF_R
        return s.header.p_flags & mask and not self.__segment_is_exec(s)


    def __segment_is_exec(self, s):
        return s.header.p_flags & P_FLAGS.PF_X


    def is_function(self, sy):
        return sy.entry.st_info.type == "STT_FUNC"


    def set_arch_name(self):
        arch = self.elf.get_machine_arch()

        if arch == "MIPS":
            if self.elf.elfclass == 32:
                arch += "32"
            elif self.elf.elfclass == 64:
                arch += "64"

        self.arch = arch


    def is_big_endian(self):
        return not self.elf.little_endian


    def get_entry_point(self):
        return self.elf.header['e_entry']
Ejemplo n.º 49
0
class ReadElf(object):
    """ display_* methods are used to emit output into the output stream
    """
    def __init__(self, file, output):
        """ file:
                stream object with the ELF file to read

            output:
                output stream to write to
        """
        self.elffile = ELFFile(file)
        self.output = output

        # Lazily initialized if a debug dump is requested
        self._dwarfinfo = None

    def display_file_header(self):
        """ Display the ELF file header
        """
        self._emitline('ELF Header:')
        self._emit('  Magic:   ')
        self._emitline(' '.join('%2.2x' % byte2int(b)
                                    for b in self.elffile.e_ident_raw))
        header = self.elffile.header
        e_ident = header['e_ident']
        self._emitline('  Class:                             %s' %
                describe_ei_class(e_ident['EI_CLASS']))
        self._emitline('  Data:                              %s' %
                describe_ei_data(e_ident['EI_DATA']))
        self._emitline('  Version:                           %s' %
                describe_ei_version(e_ident['EI_VERSION']))
        self._emitline('  OS/ABI:                            %s' %
                describe_ei_osabi(e_ident['EI_OSABI']))
        self._emitline('  ABI Version:                       %d' %
                e_ident['EI_ABIVERSION'])
        self._emitline('  Type:                              %s' %
                describe_e_type(header['e_type']))
        self._emitline('  Machine:                           %s' %
                describe_e_machine(header['e_machine']))
        self._emitline('  Version:                           %s' %
                describe_e_version_numeric(header['e_version']))
        self._emitline('  Entry point address:               %s' %
                self._format_hex(header['e_entry']))
        self._emit('  Start of program headers:          %s' %
                header['e_phoff'])
        self._emitline(' (bytes into file)')
        self._emit('  Start of section headers:          %s' %
                header['e_shoff'])
        self._emitline(' (bytes into file)')
        self._emitline('  Flags:                             %s' %
                self._format_hex(header['e_flags']))
        self._emitline('  Size of this header:               %s (bytes)' %
                header['e_ehsize'])
        self._emitline('  Size of program headers:           %s (bytes)' %
                header['e_phentsize'])
        self._emitline('  Number of program headers:         %s' %
                header['e_phnum'])
        self._emitline('  Size of section headers:           %s (bytes)' %
                header['e_shentsize'])
        self._emitline('  Number of section headers:         %s' %
                header['e_shnum'])
        self._emitline('  Section header string table index: %s' %
                header['e_shstrndx'])

    def display_program_headers(self, show_heading=True):
        """ Display the ELF program headers.
            If show_heading is True, displays the heading for this information
            (Elf file type is...)
        """
        self._emitline()
        if self.elffile.num_segments() == 0:
            self._emitline('There are no program headers in this file.')
            return

        elfheader = self.elffile.header
        if show_heading:
            self._emitline('Elf file type is %s' %
                describe_e_type(elfheader['e_type']))
            self._emitline('Entry point is %s' %
                self._format_hex(elfheader['e_entry']))
            # readelf weirness - why isn't e_phoff printed as hex? (for section
            # headers, it is...)
            self._emitline('There are %s program headers, starting at offset %s' % (
                elfheader['e_phnum'], elfheader['e_phoff']))
            self._emitline()

        self._emitline('Program Headers:')

        # Now comes the table of program headers with their attributes. Note
        # that due to different formatting constraints of 32-bit and 64-bit
        # addresses, there are some conditions on elfclass here.
        #
        # First comes the table heading
        #
        if self.elffile.elfclass == 32:
            self._emitline('  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align')
        else:
            self._emitline('  Type           Offset             VirtAddr           PhysAddr')
            self._emitline('                 FileSiz            MemSiz              Flags  Align')

        # Now the entries
        #
        for segment in self.elffile.iter_segments():
            self._emit('  %-14s ' % describe_p_type(segment['p_type']))

            if self.elffile.elfclass == 32:
                self._emitline('%s %s %s %s %s %-3s %s' % (
                    self._format_hex(segment['p_offset'], fieldsize=6),
                    self._format_hex(segment['p_vaddr'], fullhex=True),
                    self._format_hex(segment['p_paddr'], fullhex=True),
                    self._format_hex(segment['p_filesz'], fieldsize=5),
                    self._format_hex(segment['p_memsz'], fieldsize=5),
                    describe_p_flags(segment['p_flags']),
                    self._format_hex(segment['p_align'])))
            else: # 64
                self._emitline('%s %s %s' % (
                    self._format_hex(segment['p_offset'], fullhex=True),
                    self._format_hex(segment['p_vaddr'], fullhex=True),
                    self._format_hex(segment['p_paddr'], fullhex=True)))
                self._emitline('                 %s %s  %-3s    %s' % (
                    self._format_hex(segment['p_filesz'], fullhex=True),
                    self._format_hex(segment['p_memsz'], fullhex=True),
                    describe_p_flags(segment['p_flags']),
                    # lead0x set to False for p_align, to mimic readelf.
                    # No idea why the difference from 32-bit mode :-|
                    self._format_hex(segment['p_align'], lead0x=False)))

            if isinstance(segment, InterpSegment):
                self._emitline('      [Requesting program interpreter: %s]' %
                    bytes2str(segment.get_interp_name()))

        # Sections to segments mapping
        #
        if self.elffile.num_sections() == 0:
            # No sections? We're done
            return

        self._emitline('\n Section to Segment mapping:')
        self._emitline('  Segment Sections...')

        for nseg, segment in enumerate(self.elffile.iter_segments()):
            self._emit('   %2.2d     ' % nseg)

            for section in self.elffile.iter_sections():
                if (    not section.is_null() and
                        segment.section_in_segment(section)):
                    self._emit('%s ' % bytes2str(section.name))

            self._emitline('')

    def display_section_headers(self, show_heading=True):
        """ Display the ELF section headers
        """
        elfheader = self.elffile.header
        if show_heading:
            self._emitline('There are %s section headers, starting at offset %s' % (
                elfheader['e_shnum'], self._format_hex(elfheader['e_shoff'])))

        self._emitline('\nSection Header%s:' % (
            's' if elfheader['e_shnum'] > 1 else ''))

        # Different formatting constraints of 32-bit and 64-bit addresses
        #
        if self.elffile.elfclass == 32:
            self._emitline('  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al')
        else:
            self._emitline('  [Nr] Name              Type             Address           Offset')
            self._emitline('       Size              EntSize          Flags  Link  Info  Align')

        # Now the entries
        #
        for nsec, section in enumerate(self.elffile.iter_sections()):
            self._emit('  [%2u] %-17.17s %-15.15s ' % (
                nsec, bytes2str(section.name), describe_sh_type(section['sh_type'])))

            if self.elffile.elfclass == 32:
                self._emitline('%s %s %s %s %3s %2s %3s %2s' % (
                    self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False),
                    self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False),
                    self._format_hex(section['sh_size'], fieldsize=6, lead0x=False),
                    self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))
            else: # 64
                self._emitline(' %s  %s' % (
                    self._format_hex(section['sh_addr'], fullhex=True, lead0x=False),
                    self._format_hex(section['sh_offset'],
                        fieldsize=16 if section['sh_offset'] > 0xffffffff else 8,
                        lead0x=False)))
                self._emitline('       %s  %s %3s      %2s   %3s     %s' % (
                    self._format_hex(section['sh_size'], fullhex=True, lead0x=False),
                    self._format_hex(section['sh_entsize'], fullhex=True, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))

        self._emitline('Key to Flags:')
        self._emit('  W (write), A (alloc), X (execute), M (merge), S (strings)')
        if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'):
            self._emitline(', l (large)')
        else:
            self._emitline()
        self._emitline('  I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)')
        self._emitline('  O (extra OS processing required) o (OS specific), p (processor specific)')

    def display_symbol_tables(self):
        """ Display the symbol tables contained in the file
        """
        for section in self.elffile.iter_sections():
            if not isinstance(section, SymbolTableSection):
                continue

            if section['sh_entsize'] == 0:
                self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % (
                    bytes2str(section.name)))
                continue

            self._emitline("\nSymbol table '%s' contains %s entries:" % (
                bytes2str(section.name), section.num_symbols()))

            if self.elffile.elfclass == 32:
                self._emitline('   Num:    Value  Size Type    Bind   Vis      Ndx Name')
            else: # 64
                self._emitline('   Num:    Value          Size Type    Bind   Vis      Ndx Name')

            for nsym, symbol in enumerate(section.iter_symbols()):
                # symbol names are truncated to 25 chars, similarly to readelf
                self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s' % (
                    nsym,
                    self._format_hex(symbol['st_value'], fullhex=True, lead0x=False),
                    symbol['st_size'],
                    describe_symbol_type(symbol['st_info']['type']),
                    describe_symbol_bind(symbol['st_info']['bind']),
                    describe_symbol_visibility(symbol['st_other']['visibility']),
                    describe_symbol_shndx(symbol['st_shndx']),
                    bytes2str(symbol.name)))

    def display_relocations(self):
        """ Display the relocations contained in the file
        """
        has_relocation_sections = False
        for section in self.elffile.iter_sections():
            if not isinstance(section, RelocationSection):
                continue

            has_relocation_sections = True
            self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % (
                bytes2str(section.name),
                self._format_hex(section['sh_offset']),
                section.num_relocations()))
            if section.is_RELA():
                self._emitline("  Offset          Info           Type           Sym. Value    Sym. Name + Addend")
            else:
                self._emitline(" Offset     Info    Type            Sym.Value  Sym. Name")

            # The symbol table section pointed to in sh_link
            symtable = self.elffile.get_section(section['sh_link'])

            for rel in section.iter_relocations():
                hexwidth = 8 if self.elffile.elfclass == 32 else 12
                self._emit('%s  %s %-17.17s' % (
                    self._format_hex(rel['r_offset'],
                        fieldsize=hexwidth, lead0x=False),
                    self._format_hex(rel['r_info'],
                        fieldsize=hexwidth, lead0x=False),
                    describe_reloc_type(
                        rel['r_info_type'], self.elffile)))

                if rel['r_info_sym'] == 0:
                    self._emitline()
                    continue

                symbol = symtable.get_symbol(rel['r_info_sym'])
                # Some symbols have zero 'st_name', so instead what's used is
                # the name of the section they point at
                if symbol['st_name'] == 0:
                    symsec = self.elffile.get_section(symbol['st_shndx'])
                    symbol_name = symsec.name
                else:
                    symbol_name = symbol.name
                self._emit(' %s %s%22.22s' % (
                    self._format_hex(
                        symbol['st_value'],
                        fullhex=True, lead0x=False),
                    '  ' if self.elffile.elfclass == 32 else '',
                    bytes2str(symbol_name)))
                if section.is_RELA():
                    self._emit(' %s %x' % (
                        '+' if rel['r_addend'] >= 0 else '-',
                        abs(rel['r_addend'])))
                self._emitline()

        if not has_relocation_sections:
            self._emitline('\nThere are no relocations in this file.')

    def display_hex_dump(self, section_spec):
        """ Display a hex dump of a section. section_spec is either a section
            number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            self._emitline("Section '%s' does not exist in the file!" % (
                section_spec))
            return

        self._emitline("\nHex dump of section '%s':" % bytes2str(section.name))
        self._note_relocs_for_section(section)
        addr = section['sh_addr']
        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            bytesleft = len(data) - dataptr
            # chunks of 16 bytes per line
            linebytes = 16 if bytesleft > 16 else bytesleft

            self._emit('  %s ' % self._format_hex(addr, fieldsize=8))
            for i in range(16):
                if i < linebytes:
                    self._emit('%2.2x' % byte2int(data[dataptr + i]))
                else:
                    self._emit('  ')
                if i % 4 == 3:
                    self._emit(' ')

            for i in range(linebytes):
                c = data[dataptr + i : dataptr + i + 1]
                if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f:
                    self._emit(bytes2str(c))
                else:
                    self._emit(bytes2str(b'.'))

            self._emitline()
            addr += linebytes
            dataptr += linebytes

        self._emitline()

    def display_string_dump(self, section_spec):
        """ Display a strings dump of a section. section_spec is either a
            section number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            self._emitline("Section '%s' does not exist in the file!" % (
                section_spec))
            return

        self._emitline("\nString dump of section '%s':" % bytes2str(section.name))

        found = False
        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            while ( dataptr < len(data) and
                    not (32 <= byte2int(data[dataptr]) <= 127)):
                dataptr += 1

            if dataptr >= len(data):
                break

            endptr = dataptr
            while endptr < len(data) and byte2int(data[endptr]) != 0:
                endptr += 1

            found = True
            self._emitline('  [%6x]  %s' % (
                dataptr, bytes2str(data[dataptr:endptr])))

            dataptr = endptr

        if not found:
            self._emitline('  No strings found in this section.')
        else:
            self._emitline()

    def display_debug_dump(self, dump_what):
        """ Dump a DWARF section
        """
        self._init_dwarfinfo()
        if self._dwarfinfo is None:
            return

        set_global_machine_arch(self.elffile.get_machine_arch())

        if dump_what == 'info':
            self._dump_debug_info()
        elif dump_what == 'decodedline':
            self._dump_debug_line_programs()
        elif dump_what == 'frames':
            self._dump_debug_frames()
        elif dump_what == 'frames-interp':
            self._dump_debug_frames_interp()
        else:
            self._emitline('debug dump not yet supported for "%s"' % dump_what)

    def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True):
        """ Format an address into a hexadecimal string.

            fieldsize:
                Size of the hexadecimal field (with leading zeros to fit the
                address into. For example with fieldsize=8, the format will
                be %08x
                If None, the minimal required field size will be used.

            fullhex:
                If True, override fieldsize to set it to the maximal size
                needed for the elfclass

            lead0x:
                If True, leading 0x is added
        """
        s = '0x' if lead0x else ''
        if fullhex:
            fieldsize = 8 if self.elffile.elfclass == 32 else 16
        if fieldsize is None:
            field = '%x'
        else:
            field = '%' + '0%sx' % fieldsize
        return s + field % addr

    def _section_from_spec(self, spec):
        """ Retrieve a section given a "spec" (either number or name).
            Return None if no such section exists in the file.
        """
        try:
            num = int(spec)
            if num < self.elffile.num_sections():
                return self.elffile.get_section(num)
            else:
                return None
        except ValueError:
            # Not a number. Must be a name then
            return self.elffile.get_section_by_name(str2bytes(spec))

    def _note_relocs_for_section(self, section):
        """ If there are relocation sections pointing to the givne section,
            emit a note about it.
        """
        for relsec in self.elffile.iter_sections():
            if isinstance(relsec, RelocationSection):
                info_idx = relsec['sh_info']
                if self.elffile.get_section(info_idx) == section:
                    self._emitline('  Note: This section has relocations against it, but these have NOT been applied to this dump.')
                    return

    def _init_dwarfinfo(self):
        """ Initialize the DWARF info contained in the file and assign it to
            self._dwarfinfo.
            Leave self._dwarfinfo at None if no DWARF info was found in the file
        """
        if self._dwarfinfo is not None:
            return

        if self.elffile.has_dwarf_info():
            self._dwarfinfo = self.elffile.get_dwarf_info()
        else:
            self._dwarfinfo = None

    def _dump_debug_info(self):
        """ Dump the debugging info section.
        """
        self._emitline('Contents of the .debug_info section:\n')

        # Offset of the .debug_info section in the stream
        section_offset = self._dwarfinfo.debug_info_sec.global_offset

        for cu in self._dwarfinfo.iter_CUs():
            self._emitline('  Compilation Unit @ offset %s:' %
                self._format_hex(cu.cu_offset))
            self._emitline('   Length:        %s (%s)' % (
                self._format_hex(cu['unit_length']),
                '%s-bit' % cu.dwarf_format()))
            self._emitline('   Version:       %s' % cu['version']),
            self._emitline('   Abbrev Offset: %s' % cu['debug_abbrev_offset']),
            self._emitline('   Pointer Size:  %s' % cu['address_size'])

            # The nesting depth of each DIE within the tree of DIEs must be
            # displayed. To implement this, a counter is incremented each time
            # the current DIE has children, and decremented when a null die is
            # encountered. Due to the way the DIE tree is serialized, this will
            # correctly reflect the nesting depth
            #
            die_depth = 0
            for die in cu.iter_DIEs():
                if die.is_null():
                    die_depth -= 1
                    continue
                self._emitline(' <%s><%x>: Abbrev Number: %s (%s)' % (
                    die_depth,
                    die.offset,
                    die.abbrev_code,
                    die.tag))

                for attr in itervalues(die.attributes):
                    name = attr.name
                    # Unknown attribute values are passed-through as integers
                    if isinstance(name, int):
                        name = 'Unknown AT value: %x' % name
                    self._emitline('    <%2x>   %-18s: %s' % (
                        attr.offset,
                        name,
                        describe_attr_value(
                            attr, die, section_offset)))

                if die.has_children:
                    die_depth += 1

        self._emitline()

    def _dump_debug_line_programs(self):
        """ Dump the (decoded) line programs from .debug_line
            The programs are dumped in the order of the CUs they belong to.
        """
        self._emitline('Decoded dump of debug contents of section .debug_line:\n')

        for cu in self._dwarfinfo.iter_CUs():
            lineprogram = self._dwarfinfo.line_program_for_CU(cu)

            cu_filename = ''
            if len(lineprogram['include_directory']) > 0:
                cu_filename = '%s/%s' % (
                    bytes2str(lineprogram['include_directory'][0]),
                    bytes2str(lineprogram['file_entry'][0].name))
            else:
                cu_filename = bytes2str(lineprogram['file_entry'][0].name)

            self._emitline('CU: %s:' % cu_filename)
            self._emitline('File name                            Line number    Starting address')

            # Print each state's file, line and address information. For some
            # instructions other output is needed to be compatible with
            # readelf.
            for entry in lineprogram.get_entries():
                state = entry.state
                if state is None:
                    # Special handling for commands that don't set a new state
                    if entry.command == DW_LNS_set_file:
                        file_entry = lineprogram['file_entry'][entry.args[0] - 1]
                        if file_entry.dir_index == 0:
                            # current directory
                            self._emitline('\n./%s:[++]' % (
                                bytes2str(file_entry.name)))
                        else:
                            self._emitline('\n%s/%s:' % (
                                bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]),
                                bytes2str(file_entry.name)))
                    elif entry.command == DW_LNE_define_file:
                        self._emitline('%s:' % (
                            bytes2str(lineprogram['include_directory'][entry.args[0].dir_index])))
                elif not state.end_sequence:
                    # readelf doesn't print the state after end_sequence
                    # instructions. I think it's a bug but to be compatible
                    # I don't print them too.
                    self._emitline('%-35s  %11d  %18s' % (
                        bytes2str(lineprogram['file_entry'][state.file - 1].name),
                        state.line,
                        '0' if state.address == 0 else
                               self._format_hex(state.address)))
                if entry.command == DW_LNS_copy:
                    # Another readelf oddity...
                    self._emitline()

    def _dump_debug_frames(self):
        """ Dump the raw frame information from .debug_frame
        """
        if not self._dwarfinfo.has_CFI():
            return
        self._emitline('Contents of the .debug_frame section:')

        for entry in self._dwarfinfo.CFI_entries():
            if isinstance(entry, CIE):
                self._emitline('\n%08x %08x %08x CIE' % (
                    entry.offset, entry['length'], entry['CIE_id']))
                self._emitline('  Version:               %d' % entry['version'])
                self._emitline('  Augmentation:          "%s"' % bytes2str(entry['augmentation']))
                self._emitline('  Code alignment factor: %u' % entry['code_alignment_factor'])
                self._emitline('  Data alignment factor: %d' % entry['data_alignment_factor'])
                self._emitline('  Return address column: %d' % entry['return_address_register'])
                self._emitline()
            else: # FDE
                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_pointer'],
                    entry.cie.offset,
                    entry['initial_location'],
                    entry['initial_location'] + entry['address_range']))

            self._emit(describe_CFI_instructions(entry))
        self._emitline()

    def _dump_debug_frames_interp(self):
        """ Dump the interpreted (decoded) frame information from .debug_frame
        """
        if not self._dwarfinfo.has_CFI():
            return

        self._emitline('Contents of the .debug_frame section:')

        for entry in self._dwarfinfo.CFI_entries():
            if isinstance(entry, CIE):
                self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_id'],
                    bytes2str(entry['augmentation']),
                    entry['code_alignment_factor'],
                    entry['data_alignment_factor'],
                    entry['return_address_register']))
                ra_regnum = entry['return_address_register']
            else: # FDE
                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_pointer'],
                    entry.cie.offset,
                    entry['initial_location'],
                    entry['initial_location'] + entry['address_range']))
                ra_regnum = entry.cie['return_address_register']

            # Print the heading row for the decoded table
            self._emit('   LOC')
            self._emit('  ' if entry.structs.address_size == 4 else '          ')
            self._emit(' CFA      ')

            # Decode the table nad look at the registers it describes.
            # We build reg_order here to match readelf's order. In particular,
            # registers are sorted by their number, and the register matching
            # ra_regnum is always listed last with a special heading.
            decoded_table = entry.get_decoded()
            reg_order = sorted(ifilter(
                lambda r: r != ra_regnum,
                decoded_table.reg_order))

            # Headings for the registers
            for regnum in reg_order:
                self._emit('%-6s' % describe_reg_name(regnum))
            self._emitline('ra      ')

            # Now include ra_regnum in reg_order to print its values similarly
            # to the other registers.
            reg_order.append(ra_regnum)
            for line in decoded_table.table:
                self._emit(self._format_hex(
                    line['pc'], fullhex=True, lead0x=False))
                self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa']))

                for regnum in reg_order:
                    if regnum in line:
                        s = describe_CFI_register_rule(line[regnum])
                    else:
                        s = 'u'
                    self._emit('%-6s' % s)
                self._emitline()
        self._emitline()

    def _emit(self, s=''):
        """ Emit an object to output
        """
        self.output.write(str(s))

    def _emitline(self, s=''):
        """ Emit an object to output, followed by a newline
        """
        self.output.write(str(s) + '\n')
Ejemplo n.º 50
0
class ReadElf(object):
    """ display_* methods are used to emit output into the output stream
    """
    def __init__(self, file, output):
        """ file:
                stream object with the ELF file to read

            output:
                output stream to write to
        """
        self.elffile = ELFFile(file)
        self.output = output

        # Lazily initialized if a debug dump is requested
        self._dwarfinfo = None

        self._versioninfo = None

    def display_file_header(self):
        """ Display the ELF file header
        """
        self._emitline('ELF Header:')
        self._emit('  Magic:   ')
        self._emitline(' '.join('%2.2x' % byte2int(b)
                                    for b in self.elffile.e_ident_raw))
        header = self.elffile.header
        e_ident = header['e_ident']
        self._emitline('  Class:                             %s' %
                describe_ei_class(e_ident['EI_CLASS']))
        self._emitline('  Data:                              %s' %
                describe_ei_data(e_ident['EI_DATA']))
        self._emitline('  Version:                           %s' %
                describe_ei_version(e_ident['EI_VERSION']))
        self._emitline('  OS/ABI:                            %s' %
                describe_ei_osabi(e_ident['EI_OSABI']))
        self._emitline('  ABI Version:                       %d' %
                e_ident['EI_ABIVERSION'])
        self._emitline('  Type:                              %s' %
                describe_e_type(header['e_type']))
        self._emitline('  Machine:                           %s' %
                describe_e_machine(header['e_machine']))
        self._emitline('  Version:                           %s' %
                describe_e_version_numeric(header['e_version']))
        self._emitline('  Entry point address:               %s' %
                self._format_hex(header['e_entry']))
        self._emit('  Start of program headers:          %s' %
                header['e_phoff'])
        self._emitline(' (bytes into file)')
        self._emit('  Start of section headers:          %s' %
                header['e_shoff'])
        self._emitline(' (bytes into file)')
        self._emitline('  Flags:                             %s%s' %
                (self._format_hex(header['e_flags']),
                self.decode_flags(header['e_flags'])))
        self._emitline('  Size of this header:               %s (bytes)' %
                header['e_ehsize'])
        self._emitline('  Size of program headers:           %s (bytes)' %
                header['e_phentsize'])
        self._emitline('  Number of program headers:         %s' %
                header['e_phnum'])
        self._emitline('  Size of section headers:           %s (bytes)' %
                header['e_shentsize'])
        self._emitline('  Number of section headers:         %s' %
                header['e_shnum'])
        self._emitline('  Section header string table index: %s' %
                header['e_shstrndx'])

    def decode_flags(self, flags):
        description = ""
        if self.elffile['e_machine'] == "EM_ARM":
            if flags & E_FLAGS.EF_ARM_HASENTRY:
                description += ", has entry point"

            version = flags & E_FLAGS.EF_ARM_EABIMASK
            if version == E_FLAGS.EF_ARM_EABI_VER5:
                description += ", Version5 EABI"
        return description

    def display_program_headers(self, show_heading=True):
        """ Display the ELF program headers.
            If show_heading is True, displays the heading for this information
            (Elf file type is...)
        """
        self._emitline()
        if self.elffile.num_segments() == 0:
            self._emitline('There are no program headers in this file.')
            return

        elfheader = self.elffile.header
        if show_heading:
            self._emitline('Elf file type is %s' %
                describe_e_type(elfheader['e_type']))
            self._emitline('Entry point is %s' %
                self._format_hex(elfheader['e_entry']))
            # readelf weirness - why isn't e_phoff printed as hex? (for section
            # headers, it is...)
            self._emitline('There are %s program headers, starting at offset %s' % (
                elfheader['e_phnum'], elfheader['e_phoff']))
            self._emitline()

        self._emitline('Program Headers:')

        # Now comes the table of program headers with their attributes. Note
        # that due to different formatting constraints of 32-bit and 64-bit
        # addresses, there are some conditions on elfclass here.
        #
        # First comes the table heading
        #
        if self.elffile.elfclass == 32:
            self._emitline('  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align')
        else:
            self._emitline('  Type           Offset             VirtAddr           PhysAddr')
            self._emitline('                 FileSiz            MemSiz              Flags  Align')

        # Now the entries
        #
        for segment in self.elffile.iter_segments():
            self._emit('  %-14s ' % describe_p_type(segment['p_type']))

            if self.elffile.elfclass == 32:
                self._emitline('%s %s %s %s %s %-3s %s' % (
                    self._format_hex(segment['p_offset'], fieldsize=6),
                    self._format_hex(segment['p_vaddr'], fullhex=True),
                    self._format_hex(segment['p_paddr'], fullhex=True),
                    self._format_hex(segment['p_filesz'], fieldsize=5),
                    self._format_hex(segment['p_memsz'], fieldsize=5),
                    describe_p_flags(segment['p_flags']),
                    self._format_hex(segment['p_align'])))
            else: # 64
                self._emitline('%s %s %s' % (
                    self._format_hex(segment['p_offset'], fullhex=True),
                    self._format_hex(segment['p_vaddr'], fullhex=True),
                    self._format_hex(segment['p_paddr'], fullhex=True)))
                self._emitline('                 %s %s  %-3s    %s' % (
                    self._format_hex(segment['p_filesz'], fullhex=True),
                    self._format_hex(segment['p_memsz'], fullhex=True),
                    describe_p_flags(segment['p_flags']),
                    # lead0x set to False for p_align, to mimic readelf.
                    # No idea why the difference from 32-bit mode :-|
                    self._format_hex(segment['p_align'], lead0x=False)))

            if isinstance(segment, InterpSegment):
                self._emitline('      [Requesting program interpreter: %s]' %
                    bytes2str(segment.get_interp_name()))

        # Sections to segments mapping
        #
        if self.elffile.num_sections() == 0:
            # No sections? We're done
            return

        self._emitline('\n Section to Segment mapping:')
        self._emitline('  Segment Sections...')

        for nseg, segment in enumerate(self.elffile.iter_segments()):
            self._emit('   %2.2d     ' % nseg)

            for section in self.elffile.iter_sections():
                if (    not section.is_null() and
                        segment.section_in_segment(section)):
                    self._emit('%s ' % bytes2str(section.name))

            self._emitline('')

    def display_section_headers(self, show_heading=True):
        """ Display the ELF section headers
        """
        elfheader = self.elffile.header
        if show_heading:
            self._emitline('There are %s section headers, starting at offset %s' % (
                elfheader['e_shnum'], self._format_hex(elfheader['e_shoff'])))

        self._emitline('\nSection Header%s:' % (
            's' if elfheader['e_shnum'] > 1 else ''))

        # Different formatting constraints of 32-bit and 64-bit addresses
        #
        if self.elffile.elfclass == 32:
            self._emitline('  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al')
        else:
            self._emitline('  [Nr] Name              Type             Address           Offset')
            self._emitline('       Size              EntSize          Flags  Link  Info  Align')

        # Now the entries
        #
        for nsec, section in enumerate(self.elffile.iter_sections()):
            self._emit('  [%2u] %-17.17s %-15.15s ' % (
                nsec, bytes2str(section.name), describe_sh_type(section['sh_type'])))

            if self.elffile.elfclass == 32:
                self._emitline('%s %s %s %s %3s %2s %3s %2s' % (
                    self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False),
                    self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False),
                    self._format_hex(section['sh_size'], fieldsize=6, lead0x=False),
                    self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))
            else: # 64
                self._emitline(' %s  %s' % (
                    self._format_hex(section['sh_addr'], fullhex=True, lead0x=False),
                    self._format_hex(section['sh_offset'],
                        fieldsize=16 if section['sh_offset'] > 0xffffffff else 8,
                        lead0x=False)))
                self._emitline('       %s  %s %3s      %2s   %3s     %s' % (
                    self._format_hex(section['sh_size'], fullhex=True, lead0x=False),
                    self._format_hex(section['sh_entsize'], fullhex=True, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))

        self._emitline('Key to Flags:')
        self._emit('  W (write), A (alloc), X (execute), M (merge), S (strings)')
        if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'):
            self._emitline(', l (large)')
        else:
            self._emitline()
        self._emitline('  I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)')
        self._emitline('  O (extra OS processing required) o (OS specific), p (processor specific)')

    def display_symbol_tables(self):
        """ Display the symbol tables contained in the file
        """
        self._init_versioninfo()

        for section in self.elffile.iter_sections():
            if not isinstance(section, SymbolTableSection):
                continue

            if section['sh_entsize'] == 0:
                self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % (
                    bytes2str(section.name)))
                continue

            self._emitline("\nSymbol table '%s' contains %s entries:" % (
                bytes2str(section.name), section.num_symbols()))

            if self.elffile.elfclass == 32:
                self._emitline('   Num:    Value  Size Type    Bind   Vis      Ndx Name')
            else: # 64
                self._emitline('   Num:    Value          Size Type    Bind   Vis      Ndx Name')

            for nsym, symbol in enumerate(section.iter_symbols()):

                version_info = ''
                # readelf doesn't display version info for Solaris versioning
                if (section['sh_type'] == 'SHT_DYNSYM' and
                        self._versioninfo['type'] == 'GNU'):
                    version = self._symbol_version(nsym)
                    if (version['name'] != bytes2str(symbol.name) and
                        version['index'] not in ('VER_NDX_LOCAL',
                                                 'VER_NDX_GLOBAL')):
                        if version['filename']:
                            # external symbol
                            version_info = '@%(name)s (%(index)i)' % version
                        else:
                            # internal symbol
                            if version['hidden']:
                                version_info = '@%(name)s' % version
                            else:
                                version_info = '@@%(name)s' % version

                # symbol names are truncated to 25 chars, similarly to readelf
                self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s%s' % (
                    nsym,
                    self._format_hex(
                        symbol['st_value'], fullhex=True, lead0x=False),
                    symbol['st_size'],
                    describe_symbol_type(symbol['st_info']['type']),
                    describe_symbol_bind(symbol['st_info']['bind']),
                    describe_symbol_visibility(symbol['st_other']['visibility']),
                    describe_symbol_shndx(symbol['st_shndx']),
                    bytes2str(symbol.name),
                    version_info))

    def display_dynamic_tags(self):
        """ Display the dynamic tags contained in the file
        """
        has_dynamic_sections = False
        for section in self.elffile.iter_sections():
            if not isinstance(section, DynamicSection):
                continue

            has_dynamic_sections = True
            self._emitline("\nDynamic section at offset %s contains %s entries:" % (
                self._format_hex(section['sh_offset']),
                section.num_tags()))
            self._emitline("  Tag        Type                         Name/Value")

            padding = 20 + (8 if self.elffile.elfclass == 32 else 0)
            for tag in section.iter_tags():
                if tag.entry.d_tag == 'DT_NEEDED':
                    parsed = 'Shared library: [%s]' % bytes2str(tag.needed)
                elif tag.entry.d_tag == 'DT_RPATH':
                    parsed = 'Library rpath: [%s]' % bytes2str(tag.rpath)
                elif tag.entry.d_tag == 'DT_RUNPATH':
                    parsed = 'Library runpath: [%s]' % bytes2str(tag.runpath)
                elif tag.entry.d_tag == 'DT_SONAME':
                    parsed = 'Library soname: [%s]' % bytes2str(tag.soname)
                elif (tag.entry.d_tag.endswith('SZ') or
                      tag.entry.d_tag.endswith('ENT')):
                    parsed = '%i (bytes)' % tag['d_val']
                elif (tag.entry.d_tag.endswith('NUM') or
                      tag.entry.d_tag.endswith('COUNT')):
                    parsed = '%i' % tag['d_val']
                elif tag.entry.d_tag == 'DT_PLTREL':
                    s = describe_dyn_tag(tag.entry.d_val)
                    if s.startswith('DT_'):
                        s = s[3:]
                    parsed = '%s' % s
                else:
                    parsed = '%#x' % tag['d_val']

                self._emitline(" %s %-*s %s" % (
                    self._format_hex(ENUM_D_TAG.get(tag.entry.d_tag, tag.entry.d_tag),
                        fullhex=True, lead0x=True),
                    padding,
                    '(%s)' % (tag.entry.d_tag[3:],),
                    parsed))
        if not has_dynamic_sections:
            # readelf only prints this if there is at least one segment
            if self.elffile.num_segments():
                self._emitline("\nThere is no dynamic section in this file.")

    def display_relocations(self):
        """ Display the relocations contained in the file
        """
        has_relocation_sections = False
        for section in self.elffile.iter_sections():
            if not isinstance(section, RelocationSection):
                continue

            has_relocation_sections = True
            self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % (
                bytes2str(section.name),
                self._format_hex(section['sh_offset']),
                section.num_relocations()))
            if section.is_RELA():
                self._emitline("  Offset          Info           Type           Sym. Value    Sym. Name + Addend")
            else:
                self._emitline(" Offset     Info    Type            Sym.Value  Sym. Name")

            # The symbol table section pointed to in sh_link
            symtable = self.elffile.get_section(section['sh_link'])

            for rel in section.iter_relocations():
                hexwidth = 8 if self.elffile.elfclass == 32 else 12
                self._emit('%s  %s %-17.17s' % (
                    self._format_hex(rel['r_offset'],
                        fieldsize=hexwidth, lead0x=False),
                    self._format_hex(rel['r_info'],
                        fieldsize=hexwidth, lead0x=False),
                    describe_reloc_type(
                        rel['r_info_type'], self.elffile)))

                if rel['r_info_sym'] == 0:
                    self._emitline()
                    continue

                symbol = symtable.get_symbol(rel['r_info_sym'])
                # Some symbols have zero 'st_name', so instead what's used is
                # the name of the section they point at
                if symbol['st_name'] == 0:
                    symsec = self.elffile.get_section(symbol['st_shndx'])
                    symbol_name = symsec.name
                else:
                    symbol_name = symbol.name
                self._emit(' %s %s%22.22s' % (
                    self._format_hex(
                        symbol['st_value'],
                        fullhex=True, lead0x=False),
                    '  ' if self.elffile.elfclass == 32 else '',
                    bytes2str(symbol_name)))
                if section.is_RELA():
                    self._emit(' %s %x' % (
                        '+' if rel['r_addend'] >= 0 else '-',
                        abs(rel['r_addend'])))
                self._emitline()

        if not has_relocation_sections:
            self._emitline('\nThere are no relocations in this file.')

    def display_version_info(self):
        """ Display the version info contained in the file
        """
        self._init_versioninfo()

        if not self._versioninfo['type']:
            self._emitline("\nNo version information found in this file.")
            return

        for section in self.elffile.iter_sections():
            if isinstance(section, GNUVerSymSection):
                self._print_version_section_header(
                    section, 'Version symbols', lead0x=False)

                num_symbols = section.num_symbols()
    
                # Symbol version info are printed four by four entries 
                for idx_by_4 in range(0, num_symbols, 4):

                    self._emit('  %03x:' % idx_by_4)

                    for idx in range(idx_by_4, min(idx_by_4 + 4, num_symbols)):

                        symbol_version = self._symbol_version(idx)
                        if symbol_version['index'] == 'VER_NDX_LOCAL':
                            version_index = 0
                            version_name = '(*local*)'
                        elif symbol_version['index'] == 'VER_NDX_GLOBAL':
                            version_index = 1
                            version_name = '(*global*)'
                        else:
                            version_index = symbol_version['index']
                            version_name = '(%(name)s)' % symbol_version

                        visibility = 'h' if symbol_version['hidden'] else ' '

                        self._emit('%4x%s%-13s' % (
                            version_index, visibility, version_name))

                    self._emitline()

            elif isinstance(section, GNUVerDefSection):
                self._print_version_section_header(
                    section, 'Version definition', indent=2)

                offset = 0
                for verdef, verdaux_iter in section.iter_versions():
                    verdaux = next(verdaux_iter)

                    name = verdaux.name
                    if verdef['vd_flags']:
                        flags = describe_ver_flags(verdef['vd_flags'])
                        # Mimic exactly the readelf output
                        flags += ' '
                    else:
                        flags = 'none'

                    self._emitline('  %s: Rev: %i  Flags: %s  Index: %i'
                                   '  Cnt: %i  Name: %s' % (
                            self._format_hex(offset, fieldsize=6,
                                             alternate=True),
                            verdef['vd_version'], flags, verdef['vd_ndx'],
                            verdef['vd_cnt'], bytes2str(name)))

                    verdaux_offset = (
                            offset + verdef['vd_aux'] + verdaux['vda_next'])
                    for idx, verdaux in enumerate(verdaux_iter, start=1):
                        self._emitline('  %s: Parent %i: %s' %
                            (self._format_hex(verdaux_offset, fieldsize=4),
                                              idx, bytes2str(verdaux.name)))
                        verdaux_offset += verdaux['vda_next']

                    offset += verdef['vd_next']

            elif isinstance(section, GNUVerNeedSection):
                self._print_version_section_header(section, 'Version needs')

                offset = 0
                for verneed, verneed_iter in section.iter_versions():

                    self._emitline('  %s: Version: %i  File: %s  Cnt: %i' % (
                            self._format_hex(offset, fieldsize=6,
                                             alternate=True),
                            verneed['vn_version'], bytes2str(verneed.name),
                            verneed['vn_cnt']))

                    vernaux_offset = offset + verneed['vn_aux']
                    for idx, vernaux in enumerate(verneed_iter, start=1):
                        if vernaux['vna_flags']:
                            flags = describe_ver_flags(vernaux['vna_flags'])
                            # Mimic exactly the readelf output
                            flags += ' '
                        else:
                            flags = 'none'

                        self._emitline(
                            '  %s:   Name: %s  Flags: %s  Version: %i' % (
                                self._format_hex(vernaux_offset, fieldsize=4),
                                bytes2str(vernaux.name), flags,
                                vernaux['vna_other']))

                        vernaux_offset += vernaux['vna_next']

                    offset += verneed['vn_next']

    def display_hex_dump(self, section_spec):
        """ Display a hex dump of a section. section_spec is either a section
            number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            self._emitline("Section '%s' does not exist in the file!" % (
                section_spec))
            return

        self._emitline("\nHex dump of section '%s':" % bytes2str(section.name))
        self._note_relocs_for_section(section)
        addr = section['sh_addr']
        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            bytesleft = len(data) - dataptr
            # chunks of 16 bytes per line
            linebytes = 16 if bytesleft > 16 else bytesleft

            self._emit('  %s ' % self._format_hex(addr, fieldsize=8))
            for i in range(16):
                if i < linebytes:
                    self._emit('%2.2x' % byte2int(data[dataptr + i]))
                else:
                    self._emit('  ')
                if i % 4 == 3:
                    self._emit(' ')

            for i in range(linebytes):
                c = data[dataptr + i : dataptr + i + 1]
                if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f:
                    self._emit(bytes2str(c))
                else:
                    self._emit(bytes2str(b'.'))

            self._emitline()
            addr += linebytes
            dataptr += linebytes

        self._emitline()

    def display_string_dump(self, section_spec):
        """ Display a strings dump of a section. section_spec is either a
            section number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            self._emitline("Section '%s' does not exist in the file!" % (
                section_spec))
            return

        self._emitline("\nString dump of section '%s':" % bytes2str(section.name))

        found = False
        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            while ( dataptr < len(data) and
                    not (32 <= byte2int(data[dataptr]) <= 127)):
                dataptr += 1

            if dataptr >= len(data):
                break

            endptr = dataptr
            while endptr < len(data) and byte2int(data[endptr]) != 0:
                endptr += 1

            found = True
            self._emitline('  [%6x]  %s' % (
                dataptr, bytes2str(data[dataptr:endptr])))

            dataptr = endptr

        if not found:
            self._emitline('  No strings found in this section.')
        else:
            self._emitline()

    def display_debug_dump(self, dump_what):
        """ Dump a DWARF section
        """
        self._init_dwarfinfo()
        if self._dwarfinfo is None:
            return

        set_global_machine_arch(self.elffile.get_machine_arch())

        if dump_what == 'info':
            self._dump_debug_info()
        elif dump_what == 'decodedline':
            self._dump_debug_line_programs()
        elif dump_what == 'frames':
            self._dump_debug_frames()
        elif dump_what == 'frames-interp':
            self._dump_debug_frames_interp()
        else:
            self._emitline('debug dump not yet supported for "%s"' % dump_what)

    def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True,
                    alternate=False):
        """ Format an address into a hexadecimal string.

            fieldsize:
                Size of the hexadecimal field (with leading zeros to fit the
                address into. For example with fieldsize=8, the format will
                be %08x
                If None, the minimal required field size will be used.

            fullhex:
                If True, override fieldsize to set it to the maximal size
                needed for the elfclass

            lead0x:
                If True, leading 0x is added

            alternate:
                If True, override lead0x to emulate the alternate
                hexadecimal form specified in format string with the #
                character: only non-zero values are prefixed with 0x.
                This form is used by readelf.
        """
        if alternate:
            if addr == 0:
                lead0x = False
            else:
                lead0x = True
                fieldsize -= 2

        s = '0x' if lead0x else ''
        if fullhex:
            fieldsize = 8 if self.elffile.elfclass == 32 else 16
        if fieldsize is None:
            field = '%x'
        else:
            field = '%' + '0%sx' % fieldsize
        return s + field % addr

    def _print_version_section_header(self, version_section, name, lead0x=True,
                                      indent=1):
        """ Print a section header of one version related section (versym,
            verneed or verdef) with some options to accomodate readelf
            little differences between each header (e.g. indentation
            and 0x prefixing).
        """
        if hasattr(version_section, 'num_versions'):
            num_entries = version_section.num_versions()
        else:
            num_entries = version_section.num_symbols()

        self._emitline("\n%s section '%s' contains %s entries:" %
            (name, bytes2str(version_section.name), num_entries))
        self._emitline('%sAddr: %s  Offset: %s  Link: %i (%s)' % (
            ' ' * indent,
            self._format_hex(
                version_section['sh_addr'], fieldsize=16, lead0x=lead0x),
            self._format_hex(
                version_section['sh_offset'], fieldsize=6, lead0x=True),
            version_section['sh_link'],
            bytes2str(
                self.elffile.get_section(version_section['sh_link']).name)
            )
        )

    def _init_versioninfo(self):
        """ Search and initialize informations about version related sections
            and the kind of versioning used (GNU or Solaris).
        """
        if self._versioninfo is not None:
            return

        self._versioninfo = {'versym': None, 'verdef': None,
                             'verneed': None, 'type': None}

        for section in self.elffile.iter_sections():
            if isinstance(section, GNUVerSymSection):
                self._versioninfo['versym'] = section
            elif isinstance(section, GNUVerDefSection):
                self._versioninfo['verdef'] = section
            elif isinstance(section, GNUVerNeedSection):
                self._versioninfo['verneed'] = section
            elif isinstance(section, DynamicSection):
                for tag in section.iter_tags():
                    if tag['d_tag'] == 'DT_VERSYM':
                        self._versioninfo['type'] = 'GNU'
                        break

        if not self._versioninfo['type'] and (
                self._versioninfo['verneed'] or self._versioninfo['verdef']):
            self._versioninfo['type'] = 'Solaris'

    def _symbol_version(self, nsym):
        """ Return a dict containing information on the
                   or None if no version information is available
        """
        self._init_versioninfo()

        symbol_version = dict.fromkeys(('index', 'name', 'filename', 'hidden'))

        if (not self._versioninfo['versym'] or
                nsym >= self._versioninfo['versym'].num_symbols()):
            return None

        symbol = self._versioninfo['versym'].get_symbol(nsym)
        index = symbol.entry['ndx']
        if not index in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL'):
            index = int(index)

            if self._versioninfo['type'] == 'GNU':
                # In GNU versioning mode, the highest bit is used to
                # store wether the symbol is hidden or not
                if index & 0x8000:
                    index &= ~0x8000
                    symbol_version['hidden'] = True

            if (self._versioninfo['verdef'] and
                    index <= self._versioninfo['verdef'].num_versions()):
                _, verdaux_iter = \
                        self._versioninfo['verdef'].get_version(index)
                symbol_version['name'] = bytes2str(next(verdaux_iter).name)
            else:
                verneed, vernaux = \
                        self._versioninfo['verneed'].get_version(index)
                symbol_version['name'] = bytes2str(vernaux.name)
                symbol_version['filename'] = bytes2str(verneed.name)

        symbol_version['index'] = index
        return symbol_version

    def _section_from_spec(self, spec):
        """ Retrieve a section given a "spec" (either number or name).
            Return None if no such section exists in the file.
        """
        try:
            num = int(spec)
            if num < self.elffile.num_sections():
                return self.elffile.get_section(num)
            else:
                return None
        except ValueError:
            # Not a number. Must be a name then
            return self.elffile.get_section_by_name(str2bytes(spec))

    def _note_relocs_for_section(self, section):
        """ If there are relocation sections pointing to the givne section,
            emit a note about it.
        """
        for relsec in self.elffile.iter_sections():
            if isinstance(relsec, RelocationSection):
                info_idx = relsec['sh_info']
                if self.elffile.get_section(info_idx) == section:
                    self._emitline('  Note: This section has relocations against it, but these have NOT been applied to this dump.')
                    return

    def _init_dwarfinfo(self):
        """ Initialize the DWARF info contained in the file and assign it to
            self._dwarfinfo.
            Leave self._dwarfinfo at None if no DWARF info was found in the file
        """
        if self._dwarfinfo is not None:
            return

        if self.elffile.has_dwarf_info():
            self._dwarfinfo = self.elffile.get_dwarf_info()
        else:
            self._dwarfinfo = None

    def _dump_debug_info(self):
        """ Dump the debugging info section.
        """
        self._emitline('Contents of the .debug_info section:\n')

        # Offset of the .debug_info section in the stream
        section_offset = self._dwarfinfo.debug_info_sec.global_offset

        for cu in self._dwarfinfo.iter_CUs():
            self._emitline('  Compilation Unit @ offset %s:' %
                self._format_hex(cu.cu_offset))
            self._emitline('   Length:        %s (%s)' % (
                self._format_hex(cu['unit_length']),
                '%s-bit' % cu.dwarf_format()))
            self._emitline('   Version:       %s' % cu['version']),
            self._emitline('   Abbrev Offset: %s' % (
                self._format_hex(cu['debug_abbrev_offset']))),
            self._emitline('   Pointer Size:  %s' % cu['address_size'])

            # The nesting depth of each DIE within the tree of DIEs must be
            # displayed. To implement this, a counter is incremented each time
            # the current DIE has children, and decremented when a null die is
            # encountered. Due to the way the DIE tree is serialized, this will
            # correctly reflect the nesting depth
            #
            die_depth = 0
            for die in cu.iter_DIEs():
                self._emitline(' <%s><%x>: Abbrev Number: %s%s' % (
                    die_depth,
                    die.offset,
                    die.abbrev_code,
                    (' (%s)' % die.tag) if not die.is_null() else ''))
                if die.is_null():
                    die_depth -= 1
                    continue

                for attr in itervalues(die.attributes):
                    name = attr.name
                    # Unknown attribute values are passed-through as integers
                    if isinstance(name, int):
                        name = 'Unknown AT value: %x' % name
                    self._emitline('    <%2x>   %-18s: %s' % (
                        attr.offset,
                        name,
                        describe_attr_value(
                            attr, die, section_offset)))

                if die.has_children:
                    die_depth += 1

        self._emitline()

    def _dump_debug_line_programs(self):
        """ Dump the (decoded) line programs from .debug_line
            The programs are dumped in the order of the CUs they belong to.
        """
        self._emitline('Decoded dump of debug contents of section .debug_line:\n')

        for cu in self._dwarfinfo.iter_CUs():
            lineprogram = self._dwarfinfo.line_program_for_CU(cu)

            cu_filename = bytes2str(lineprogram['file_entry'][0].name)
            if len(lineprogram['include_directory']) > 0:
                dir_index = lineprogram['file_entry'][0].dir_index
                if dir_index > 0:
                    dir = lineprogram['include_directory'][dir_index - 1]
                else:
                    dir = b'.'
                cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)

            self._emitline('CU: %s:' % cu_filename)
            self._emitline('File name                            Line number    Starting address')

            # Print each state's file, line and address information. For some
            # instructions other output is needed to be compatible with
            # readelf.
            for entry in lineprogram.get_entries():
                state = entry.state
                if state is None:
                    # Special handling for commands that don't set a new state
                    if entry.command == DW_LNS_set_file:
                        file_entry = lineprogram['file_entry'][entry.args[0] - 1]
                        if file_entry.dir_index == 0:
                            # current directory
                            self._emitline('\n./%s:[++]' % (
                                bytes2str(file_entry.name)))
                        else:
                            self._emitline('\n%s/%s:' % (
                                bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]),
                                bytes2str(file_entry.name)))
                    elif entry.command == DW_LNE_define_file:
                        self._emitline('%s:' % (
                            bytes2str(lineprogram['include_directory'][entry.args[0].dir_index])))
                elif not state.end_sequence:
                    # readelf doesn't print the state after end_sequence
                    # instructions. I think it's a bug but to be compatible
                    # I don't print them too.
                    self._emitline('%-35s  %11d  %18s' % (
                        bytes2str(lineprogram['file_entry'][state.file - 1].name),
                        state.line,
                        '0' if state.address == 0 else
                               self._format_hex(state.address)))
                if entry.command == DW_LNS_copy:
                    # Another readelf oddity...
                    self._emitline()

    def _dump_debug_frames(self):
        """ Dump the raw frame information from .debug_frame
        """
        if not self._dwarfinfo.has_CFI():
            return
        self._emitline('Contents of the .debug_frame section:')

        for entry in self._dwarfinfo.CFI_entries():
            if isinstance(entry, CIE):
                self._emitline('\n%08x %08x %08x CIE' % (
                    entry.offset, entry['length'], entry['CIE_id']))
                self._emitline('  Version:               %d' % entry['version'])
                self._emitline('  Augmentation:          "%s"' % bytes2str(entry['augmentation']))
                self._emitline('  Code alignment factor: %u' % entry['code_alignment_factor'])
                self._emitline('  Data alignment factor: %d' % entry['data_alignment_factor'])
                self._emitline('  Return address column: %d' % entry['return_address_register'])
                self._emitline()
            else: # FDE
                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_pointer'],
                    entry.cie.offset,
                    entry['initial_location'],
                    entry['initial_location'] + entry['address_range']))

            self._emit(describe_CFI_instructions(entry))
        self._emitline()

    def _dump_debug_frames_interp(self):
        """ Dump the interpreted (decoded) frame information from .debug_frame
        """
        if not self._dwarfinfo.has_CFI():
            return

        self._emitline('Contents of the .debug_frame section:')

        for entry in self._dwarfinfo.CFI_entries():
            if isinstance(entry, CIE):
                self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_id'],
                    bytes2str(entry['augmentation']),
                    entry['code_alignment_factor'],
                    entry['data_alignment_factor'],
                    entry['return_address_register']))
                ra_regnum = entry['return_address_register']
            else: # FDE
                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_pointer'],
                    entry.cie.offset,
                    entry['initial_location'],
                    entry['initial_location'] + entry['address_range']))
                ra_regnum = entry.cie['return_address_register']

            # Print the heading row for the decoded table
            self._emit('   LOC')
            self._emit('  ' if entry.structs.address_size == 4 else '          ')
            self._emit(' CFA      ')

            # Decode the table nad look at the registers it describes.
            # We build reg_order here to match readelf's order. In particular,
            # registers are sorted by their number, and the register matching
            # ra_regnum is always listed last with a special heading.
            decoded_table = entry.get_decoded()
            reg_order = sorted(ifilter(
                lambda r: r != ra_regnum,
                decoded_table.reg_order))

            # Headings for the registers
            for regnum in reg_order:
                self._emit('%-6s' % describe_reg_name(regnum))
            self._emitline('ra      ')

            # Now include ra_regnum in reg_order to print its values similarly
            # to the other registers.
            reg_order.append(ra_regnum)
            for line in decoded_table.table:
                self._emit(self._format_hex(
                    line['pc'], fullhex=True, lead0x=False))
                self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa']))

                for regnum in reg_order:
                    if regnum in line:
                        s = describe_CFI_register_rule(line[regnum])
                    else:
                        s = 'u'
                    self._emit('%-6s' % s)
                self._emitline()
        self._emitline()

    def _emit(self, s=''):
        """ Emit an object to output
        """
        self.output.write(str(s))

    def _emitline(self, s=''):
        """ Emit an object to output, followed by a newline
        """
        self.output.write(str(s) + '\n')
Ejemplo n.º 51
0
class Image(object):
    def __init__(self, fname):
        if platform.system() == "Windows":
            elf_data = open(fname, "r")
        else:     
            with open(fname, "r") as f:
                elf_data = StringIO(f.read())
        
        self.elf = ELFFile(elf_data)
        if self.elf.has_dwarf_info():
            self.dwarf = self.elf.get_dwarf_info()
            set_global_machine_arch(self.elf.get_machine_arch())
            self.__tame_dwarf()
            self.get_expr_evaluator = lambda: ExprLiveEval(self)

    @property
    def executable(self):
        try:
            return self._exe
        except:
            self._exe = self._build_executable()
        return self._exe

    def _build_executable(self):
        s = self.elf.get_section(1)
        assert s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS"        
        base_addr = s.header["sh_addr"]
        
        img = s.data()

        s = self.elf.get_section(2)
        if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS":
            if s.header["sh_addr"] != base_addr + len(img):
                raise Exception("bad section vaddr - #2 should follow #1")

            img += s.data()

            s = self.elf.get_section(3)
            print "%s" % str(s.header)
            if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS":
                if s.header["sh_addr"] != base_addr + len(img):
                    raise Exception("bad section vaddr - #3 should follow #2")

                img += s.data()

        return (base_addr, img)

    def __tame_dwarf(self):
        dw = self.dwarf
        self._compile_units = {}
        self._addresses = {}
        self._lowest_known_address = None
        
        location_lists = dw.location_lists()
            
        
        cfi = None
        if dw.has_EH_CFI():
            cfi = dw.EH_CFI_entries()
            print "we have EH CFI entries"
        elif dw.has_CFI():
            cfi = dw.CFI_entries()
            print "we have CFI entries"
        
        else:
            print "no (EH) CFI"

        if None is not cfi:
            self._cfa_rule = {}
            for c in cfi:
                try:
                    decoded = c.get_decoded()
                except:
                    print "CFI decoding exception"
                    break

                for entry in decoded.table:
                    if entry["pc"] in self._cfa_rule:
                        print "duplicate cfa rule found at pc %x" % entry["pc"]
                        print "\t%s" % str(self._cfa_rule[entry["pc"]])
                        print "\t%s" % str(entry)
                        print
                    #assert (not entry["pc"] in self._cfa_rule) or (self._cfa_rule[entry["pc"]] == entry)
                    self._cfa_rule[entry["pc"]] = entry


            
        
        for c in dw.iter_CUs():
            functions = {}  
            variables = {}

            td = c.get_top_DIE()

            for d in td.iter_children():
                if d.tag == 'DW_TAG_subprogram':
                    if 'DW_AT_declaration' in d.attributes:
                        continue
                    lpc = d.attributes['DW_AT_low_pc'].value
                    hpc = d.attributes['DW_AT_high_pc'].value
                    if hpc < lpc:
                        hpc += lpc

                    function_name = d.attributes['DW_AT_name'].value
                    f = {}
                    f["lpc"] = lpc
                    f["hpc"] = hpc
                    f["args"] = {}
                    f["vars"] = {}
                    if 'DW_AT_frame_base' in d.attributes:
                        a = d.attributes['DW_AT_frame_base']
                        if a.form == 'DW_FORM_data4' or a.form == 'DW_FORM_sec_offset':
                            f["fb"] = location_lists.get_location_list_at_offset(a.value)
                        else:
                            f["fb"] = a.value
                    
                    for child in d.iter_children():
                        if child.tag == "DW_TAG_formal_parameter":
                            name = child.attributes['DW_AT_name'].value
                            v = {}
                            try:
                                if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']:
                                    v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value)
                                else:
                                    v["location"] = child.attributes['DW_AT_location'].value
                            except:
                                v["location"] = []
                            f["args"][name] = v
                        if child.tag == "DW_TAG_variable":
                            name = child.attributes['DW_AT_name'].value
                            v = {}
                            try:
                                if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']:
                                    v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value)
                                else:
                                    v["location"] = child.attributes['DW_AT_location'].value
                            except:
                                v["location"] = []
                            f["vars"][name] = v

                    functions[function_name] = f
                elif d.tag == 'DW_TAG_variable':
                    if d.attributes['DW_AT_decl_file'].value == 1:
                        try:
                            name = d.attributes['DW_AT_name'].value
                        except:
                            name = '(%s)' % str(d.attributes['DW_AT_name'])
                            
                        v = {}
                        try:
                            v["location"] = d.attributes['DW_AT_location'].value
                        except:
                            v["location"] = []
                        variables[name] = v

            x = {}

            fname = td.attributes['DW_AT_name'].value
            x["line_program"] = dw.line_program_for_CU(c).get_entries()
            x["lpc"] = td.attributes['DW_AT_low_pc'].value
            x["hpc"] = td.attributes['DW_AT_high_pc'].value
            x["comp_dir"] = td.attributes['DW_AT_comp_dir'].value
            x["functions"] = functions
            x["variables"] = variables

            self._compile_units[fname] = x
            if ((self._lowest_known_address is None) or
                    (self._lowest_known_address > x["lpc"])):
                self._lowest_known_address = x["lpc"]

            
        for c in self._compile_units:
            self._compile_units[c]["lines"] = {}
            for line in self._compile_units[c]["line_program"]:
                state = line.state
                if state is not None and not (state.end_sequence or state.basic_block or state.epilogue_begin or state.prologue_end):
                    cl = "%s+%d" % (c, state.line)
                    if state.address in self._addresses and self._addresses[state.address] != cl:
                        raise Exception("addr %x is both \"%s\" and \"%s+%d\"" % (state.address, self._addresses[state.address], c, state.line))
                    self._addresses[state.address] = cl
                    try: self._compile_units[c]["lines"][state.line] += [state.address]
                    except: self._compile_units[c]["lines"][state.line] = [state.address]
        
        if not cfi is None:
            print "CFA table:"
            for pc in sorted(self._cfa_rule.keys()):
                print "%x: %s\t\t(%s)" % (pc, str(self._cfa_rule[pc]), self.addr2line(pc))

    def addr2line(self, addr):
        try: return self._addresses[addr]
        except: return ''

    def loc_at(self, addr):
        line = self.addr2line(addr)
        while '' == line and addr >= self._lowest_known_address:
            addr -= 4
            line = self.addr2line(addr)
        if '' == line:
            return ("unknown", "", 0, "")

        cuname, culine = line.split("+")
        fname = ""
        c = self._compile_units[cuname]
        for f in c["functions"]:
            if ((c["functions"][f]["lpc"] <= addr) and
                    (c["functions"][f]["hpc"] >= addr)):
                fname = f
                break
        return (fname, cuname, culine, c["comp_dir"])

    def line2addr(self, fname, line):
        return self._compile_units[fname]["lines"][line]	
Ejemplo n.º 52
0
class ELF:
    def __init__(self, mem, classbinary, filename):
        import capstone as CAPSTONE

        fd = open(filename, "rb")
        self.elf = ELFFile(fd)
        self.classbinary = classbinary
        self.mem = mem

        self.arch_lookup = {
            "x86": CAPSTONE.CS_ARCH_X86,
            "x64": CAPSTONE.CS_ARCH_X86,
            "ARM": CAPSTONE.CS_ARCH_ARM,
            "MIPS": CAPSTONE.CS_ARCH_MIPS,
        }

        self.arch_mode_lookup = {
            "x86": CAPSTONE.CS_MODE_32,
            "x64": CAPSTONE.CS_MODE_64,
            "ARM": CAPSTONE.CS_ARCH_ARM,
            "MIPS": {
                32: CAPSTONE.CS_MODE_MIPS32,
                64: CAPSTONE.CS_MODE_MIPS64,
            }
        }

        self.sym_type_lookup = {
            "STT_FUNC": MEM_FUNC,
        }

        self.__sections = {} # start address -> elf section

        for s in self.elf.iter_sections():
            if not s.name:
                continue

            start = s.header.sh_addr

            if s.header.sh_flags & 0xf != 0:
                bisect.insort_left(classbinary._sorted_sections, start)

            self.__sections[start] = s
            is_data = self.__section_is_data(s)
            is_exec = self.__section_is_exec(s)
            data = s.data()

            classbinary._abs_sections[start] = SectionAbs(
                    s.name.decode(),
                    start,
                    s.header.sh_size,
                    len(data),
                    is_exec,
                    is_data,
                    data)


    def load_section_names(self):
        # Used for the auto-completion
        for s in self.elf.iter_sections():
            if s.header.sh_flags & 0xf != 0:
                ad = s.header.sh_addr
                name = s.name.decode()
                self.classbinary.section_names[name] = ad


    def load_static_sym(self):
        symtab = self.elf.get_section_by_name(b".symtab")
        if symtab is None:
            return
        dont_save = [b"$a", b"$t", b"$d"]
        arch = self.elf.get_machine_arch()
        is_arm = arch == "ARM"

        for sy in symtab.iter_symbols():
            if is_arm and sy.name in dont_save:
                continue

            ad = sy.entry.st_value
            if ad != 0 and sy.name != b"":
                name = sy.name.decode()
                if name in self.classbinary.symbols:
                    name = self.classbinary.rename_sym(name)

                self.classbinary.reverse_symbols[ad] = name
                self.classbinary.symbols[name] = ad

                ty = self.sym_type_lookup.get(sy.entry.st_info.type, MEM_UNK)
                self.mem.add(ad, 1, ty)


    def __x86_resolve_reloc(self, rel, symtab, plt, got_plt, addr_size):
        # Save all got offsets with the corresponding symbol
        got_off = {}
        for r in rel.iter_relocations():
            sym = symtab.get_symbol(r.entry.r_info_sym)
            name = sym.name.decode()
            ad = r.entry.r_offset
            if name and ad:
                ty = self.sym_type_lookup.get(sym.entry.st_info.type, MEM_UNK)
                got_off[ad] = [name + "@plt", ty]

        data = got_plt.data()

        unpack_str = "<" if self.elf.little_endian else ">"
        unpack_str += str(int(len(data) / addr_size))
        unpack_str += "Q" if addr_size == 8 else "I"

        got_values = struct.unpack(unpack_str, data)
        plt_data = plt.data()
        wrong_jump_opcode = False
        off = got_plt.header.sh_addr

        # Read the .got.plt and for each address in the plt, substract 6
        # to go at the begining of the plt entry.

        opcode_jmp = [b"\xff\x25", b"\xff\xa3"]

        for jump_in_plt in got_values:
            if off in got_off:
                plt_start = jump_in_plt - 6
                plt_off = plt_start - plt.header.sh_addr

                # Check "jmp *(ADDR)" opcode.
                if plt_data[plt_off:plt_off+2] not in opcode_jmp:
                    wrong_jump_opcode = True
                    continue

                name, ty = got_off[off]
                if name in self.classbinary.symbols:
                    continue

                self.classbinary.imports[plt_start] = True
                self.classbinary.reverse_symbols[plt_start] = name
                self.classbinary.symbols[name] = plt_start

                self.mem.add(plt_start, 1, ty)

            off += addr_size

        if wrong_jump_opcode:
            warning("I'm expecting to see a jmp *(ADDR) on each plt entry")
            warning("opcode \\xff\\x25 was not found, please report")


    def __resolve_symtab(self, rel, symtab, arch):
        # TODO: don't know why st_value is not 0 like x86
        # In some executables I've tested, it seems that st_value
        # is the address of the plt entry

        # TODO: really useful to iter on relocations and get the symbol
        # from the symtab ?
        # for r in rel.iter_relocations():
            # sym = symtab.get_symbol(r.entry.r_info_sym)

        for sym in symtab.iter_symbols():
            ad = sym.entry.st_value
            if ad != 0:
                name = sym.name.decode()
                if arch == "ARM":
                    name += "@plt"

                if name in self.classbinary.symbols:
                    continue

                self.classbinary.imports[ad] = True
                self.classbinary.reverse_symbols[ad] = name
                self.classbinary.symbols[name] = ad

                ty = self.sym_type_lookup.get(sym.entry.st_info.type, MEM_UNK)
                self.mem.add(ad, 1, ty)


    def __iter_reloc(self):
        for rel in self.elf.iter_sections():
            if rel.header.sh_type in ["SHT_RELA", "SHT_REL"]:
                symtab = self.elf.get_section(rel.header.sh_link)
                if symtab is None:
                    continue
                yield (rel, symtab)


    def load_dyn_sym(self):
        arch = self.elf.get_machine_arch()

        if arch == "ARM" or arch == "MIPS":
            for (rel, symtab) in self.__iter_reloc():
                self.__resolve_symtab(rel, symtab, arch)
            return

        # x86/x64

        # TODO: .plt can be renamed ?
        plt = self.elf.get_section_by_name(b".plt")

        if plt is None:
            warning(".plt section not found")
            return

        # TODO: .got.plt can be renamed or may be removed ?
        got_plt = self.elf.get_section_by_name(b".got.plt")
        addr_size = 8 if arch == "x64" else 4

        if got_plt is None:
            warning(".got.plt section not found")
            return

        for (rel, symtab) in self.__iter_reloc():
            self.__x86_resolve_reloc(rel, symtab, plt, got_plt, addr_size)


    def __section_is_data(self, s):
        mask = SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC
        return s.header.sh_flags & mask and not self.__section_is_exec(s)


    def __section_is_exec(self, s):
        if s is None:
            return 0
        return s.header.sh_flags & SH_FLAGS.SHF_EXECINSTR


    def section_stream_read(self, addr, size):
        s = self.classbinary.get_section(addr)
        if s is None:
            return b""
        s = self.__sections[s.start]
        off = addr - s.header.sh_addr
        end = s.header.sh_addr + s.header.sh_size
        s.stream.seek(s.header.sh_offset + off)
        return s.stream.read(min(size, end - addr))


    def get_arch(self):
        import capstone as CAPSTONE
        arch = self.arch_lookup.get(self.elf.get_machine_arch(), None)
        mode = self.arch_mode_lookup.get(self.elf.get_machine_arch(), None)

        if arch is None:
            return None, None

        # If one arch name has multiple "word size"
        if isinstance(mode, dict):
            mode = mode[self.elf.elfclass]

        if self.elf.little_endian:
            mode |= CAPSTONE.CS_MODE_LITTLE_ENDIAN
        else:
            mode |= CAPSTONE.CS_MODE_BIG_ENDIAN

        return arch, mode


    def get_arch_string(self):
        return self.elf.get_machine_arch()


    def get_entry_point(self):
        return self.elf.header['e_entry']
Ejemplo n.º 53
0
    def exe(self, filename, argv=[], envp=[], stdin='stdin', stdout='stdout', stderr='stderr'):
        '''
        Loads and an ELF program in memory and prepares the initial CPU state. 
        Creates the stack and loads the environment variables and the arguments in it.
        @param filename: pathname of the file to be executed.
        @param argv: list of parameters for the program to execute.
        @param envp: list of environment variables for the program to execute.
        @raise error:
            - 'Not matching cpu': if the program is compiled for a different architecture
            - 'Not matching memory': if the program is compiled for a different address size
        @todo: define va_randomize and read_implies_exec personality 
        '''
        #Set standar file descriptors
        self.files = [ File(stdin,'rb'), File(stdout,'wb'), File(stderr,'wb')]

        #load elf See binfmt_elf.c
        #read the ELF object file
        elf = ELFFile(file(filename)) 
        arch = {'x86':'i386','x64':'amd64'}[elf.get_machine_arch()]
        addressbitsize = {'x86':32, 'x64':64}[elf.get_machine_arch()]
        logger.info("Loading %s as a %s elf"%(filename,arch))
        logger.info("\tArguments: %s"%repr(argv))
        logger.debug("\tEnvironmen:")
        for e in envp:
            logger.debug("\t\t%s"%repr(e))

        assert self.cpu.machine == arch, "Not matching cpu"
        assert self.mem.addressbitsize == addressbitsize, "Not matching memory"
        assert elf.header.e_type in ['ET_DYN', 'ET_EXEC']
        cpu = self.cpu

        #Get interpreter elf
        interpreter = None
        for elf_segment in elf.iter_segments():
            if elf_segment.header.p_type != 'PT_INTERP':
                continue
            interpreter = ELFFile(file(elf_segment.data()[:-1]))
            break
        if not interpreter is None:
            assert interpreter.get_machine_arch() == elf.get_machine_arch()
            assert interpreter.header.e_type in ['ET_DYN', 'ET_EXEC']

        #Stack Executability
        executable_stack = False
        for elf_segment in elf.iter_segments():
            if elf_segment.header.p_type != 'PT_GNU_STACK':
                continue
            if elf_segment.header.p_flags & 0x01:
                executable_stack = True
            else:
                executable_stack = False
            break
       
        base = 0
        elf_bss = 0
        end_code = 0
        end_data = 0
        elf_brk = 0
        load_addr = 0

        base = 0
        for elf_segment in elf.iter_segments():
            if elf_segment.header.p_type != 'PT_LOAD':
                continue

            align = 0x1000 #elf_segment.header.p_align

            ELF_PAGEOFFSET = elf_segment.header.p_vaddr & (align-1)

            flags = elf_segment.header.p_flags
            memsz = elf_segment.header.p_memsz + ELF_PAGEOFFSET
            offset = elf_segment.header.p_offset - ELF_PAGEOFFSET
            filesz = elf_segment.header.p_filesz + ELF_PAGEOFFSET
            vaddr = elf_segment.header.p_vaddr - ELF_PAGEOFFSET
            memsz = self.mem._ceil(memsz+1) # (memsz + align ) & ~(align-1) 
            if base == 0 and elf.header.e_type == 'ET_DYN':
                assert vaddr == 0
                if addressbitsize == 32:
                    base = 0x56555000
                else:
                    base = 0x555555554000

            #PF_X   0x1 Execute
            #PF_W   0x2 Write
            #PF_R   0x4 Read
            #base = cpu.mem.mmap(base+vaddr,memsz,flags&0x4,flags&0x2,flags&0x1,data) - vaddr
            perms = ['   ', '  x', ' w ', ' wx', 'r  ', 'r x', 'rw ', 'rwx'][flags&7]
            hint = base+vaddr
            if hint == 0:
                hint = None
            base = self.mem.mmapFile(hint,memsz,perms,elf_segment.stream.name,offset) - vaddr
            logger.debug("Loading elf offset: %08x addr:%08x %08x %s" %(offset, base+vaddr, base+vaddr+memsz, perms))

            if load_addr == 0 :
                load_addr = base + vaddr

            k = base + vaddr + filesz;
            if k > elf_bss :
                elf_bss = k;
            if (flags & 4) and end_code < k: #PF_X
                end_code = k
            if end_data < k:
                end_data = k
            k = base + vaddr + memsz
            if k > elf_brk:
                elf_brk = k

        elf_entry = elf.header.e_entry
        if elf.header.e_type == 'ET_DYN':
            elf_entry += load_addr
        entry = elf_entry
        real_elf_brk = elf_brk

        # We need to explicitly zero any fractional pages
        # after the data section (i.e. bss).  This would
        # contain the junk from the file that should not
        # be in memory
        #TODO:
        #cpu.write(elf_bss, '\x00'*((elf_bss | (align-1))-elf_bss))

        logger.debug("Zeroing main elf fractional pages. From %x to %x.", elf_bss, elf_brk)
        logger.debug("Main elf bss:%x"%elf_bss)
        logger.debug("Main elf brk %x:"%elf_brk)

        self.mem.mprotect(self.mem._floor(elf_bss), elf_brk-elf_bss, 'rw')
        for i in xrange(elf_bss, elf_brk):
            try:
                self.mem.putchar(i, '\x00')
            except Exception, e:
                logger.debug("Exception zeroing main elf fractional pages: %s"%str(e))
Ejemplo n.º 54
0
class User:
    def __init__(self, exe):
        # uses StringIO so we don't burn the file descriptor
        with open(exe, 'rb') as f:
            self.fp = StringIO(f.read())
        magic = self.fp.read(4).encode('hex')
        self.fp.seek(0)
        self.elf = None
        self.macho = None
        self.arch = None
        self.info = None
        self.symtab = None
        self.entry = None
        if magic == '7f454c46':
            self.elf = ELFFile(self.fp)
            self.arch = self.elf.get_machine_arch()
            self.entry = self.elf['e_entry']
            self.symtab = self.elf.get_section_by_name('.symtab')
            self.info = ARCH_INFO.get(self.arch)
        elif magic in ('cafebabe', 'feedface', 'feedfacf', 'cefaedfe', 'cffaedfe'):
            macho = FileMachO(exe, self.fp)
            for header in macho.headers:
                if header.endian == '<':
                    self.macho = header
                    self.arch = mach_o.CPU_TYPE_NAMES.get(header.header.cputype)
                    self.arch = ARCH_MAP.get(self.arch, self.arch)
                    self.info = ARCH_INFO.get(self.arch)
                    for lc, cmd, data in header.commands:
                        # entry point
                        if lc.cmd == mach_o.LC_MAIN or lc.cmd == mach_o.LC_UNIXTHREAD:
                            if self.info['bits'] == 64:
                                ip = 2 * 4 + 16 * 8
                                self.entry = struct.unpack(header.endian + 'Q', data[ip:ip+8])[0]
                            else:
                                ip = 2 * 4 + 10 * 4
                                self.entry = struct.unpack(header.endian + 'L', data[ip:ip+4])[0]
                    break
            else:
                raise NotImplementedError('Could not find suitable MachO arch.')
        else:
            raise NotImplementedError('Unrecognized file magic: %s' % magic)

        if not self.info:
            raise NotImplementedError('Unsupported Unicorn arch: %s' % self.arch)
        self.bits = self.info['bits']
        self.bsz = self.bits / 8
        self.sp = self.info['sp']
        self.regs = REG_MAP.get(self.arch, [])
        self.memory = []
        self.saved_regs = {}

    # start Unicorn helpers

    def mapped(self, addr, size):
        for a, b in self.memory:
            b += a
            if addr < a and addr + size > a:
                return (a, b)
            if addr >= a and addr < b:
                return (a, b)
        return False

    def mem_map(self, addr, size):
        # TODO: this tracking could be replaced by a Unicorn api to get memory map
        # FIXME: if you overlap with the end of an existing map it will silently fail
        mapped = self.mapped(addr, size)
        if mapped:
            a, b = mapped
            if addr < a:
                size = a - addr
            elif addr < b and addr + size > b:
                right = addr + size
                addr = b
                size = right - addr
            else:
                return
        addr, size = align(addr, size, grow=True)
        self.memory.append((addr, size))
        return self.mu.mem_map(addr, size)

    def mmap(self, size, addr_hint=0):
        if not addr_hint:
            addr_hint = BASE
        _, size = align(0, size, grow=True)
        addr_hint, size = align(addr_hint, size)
        for addr in xrange(addr_hint, 2 ** 32, UC_MEM_ALIGN):
            if not self.mapped(addr, size):
                # FIXME: why is this broken without size + 1
                self.mem_map(addr, size + 1)
                return addr
        else:
            raise MemoryError('could not allocate %d bytes' % size)

    def push(self, n):
        sp = self.reg_read(self.sp)
        self.reg_write(self.sp, sp - self.bsz)
        self.mem_write(sp - self.bsz, self.pack_addr(n))

    def pop(self):
        data = self.mem_read(self.reg_read(self.sp), self.bsz)
        self.reg_write(self.sp, sp + self.bsz)
        return self.unpack_addr(data)

    def mem_write(self, addr, data):
        return self.mu.mem_write(addr, data)

    def mem_read(self, addr, size):
        return self.mu.mem_read(addr, size)

    def mem_read_cstr(self, addr):
        # FIXME: this might be buggy
        s = ''
        while not '\0' in s:
            s += self.mu.mem_read(addr, 4)
            addr += 4
        return str(s.split('\0', 1)[0])

    def reg_write(self, reg, n):
        return self.mu.reg_write(reg, n)

    def reg_read(self, reg):
        return self.mu.reg_read(reg)

    def mem_hex(self, addr, size):
        data = binascii.hexlify(self.mem_read(addr, size))
        return spaces(data, self.bsz * 2)

    def read_regs(self):
        return [(enum, name, self.reg_read(enum)) for enum, name in self.regs]

    def print_regs(self, regs=None):
        if regs is None:
            regs = self.read_regs()
        for i, (enum, name, val) in enumerate(regs):
            if i % 4 == 0 and i > 0:
                print
            print ('%3s=0x%08x' % (name, val)),
        print

    def print_changed_regs(self):
        regs = self.read_regs()
        changed = [(enum, name, val)
                   for enum, name, val in regs
                   if self.saved_regs.get(enum) != val]
        self.print_regs(changed)
        for enum, name, val in changed:
            self.saved_regs[enum] = val

    def print_dis(self, addr, size):
        mem = self.mem_read(addr, size)
        print disas(mem, addr, self.info)

    def pack_addr(self, n):
        if self.bits == 64:
            return struct.pack('<Q', n)
        else:
            return struct.pack('<I', n)

    def unpack_addr(self, data):
        if self.bits == 64:
            n, = struct.unpack('<Q', data)
        else:
            n, = struct.unpack('<I', data)
        return n

    # end Unicorn helpers

    def symbolicate(self, addr):
        if self.symtab:
            matches = defaultdict(list)
            for sym in self.symtab.iter_symbols():
                val = sym['st_value']
                size = sym['st_size']
                if sym['st_info']['type'] == 'STT_FUNC' and val <= addr and val + size > addr:
                    matches[addr - val].append(sym)
            if matches:
                # TODO pick the smallest matching symbol?
                # or indicate when you're inside multiple symbols?
                dist = sorted(matches.keys())[0]
                sym = matches[dist][0]
                return '%s+0x%02x' % (sym.name, dist)
        return '0x%x' % addr

    def map_segments(self):
        if self.elf:
            for s in self.elf.iter_segments():
                addr, size = s['p_paddr'], s['p_memsz']
                if not size:
                    continue
                self.mem_map(addr, size)
                self.mem_write(addr, s.data())
        elif self.macho:
            for lc, cmd, data in self.macho.commands:
                if lc.cmd in (mach_o.LC_SEGMENT, mach_o.LC_SEGMENT_64):
                    c = self.fp.tell()
                    for seg in data:
                        self.fp.seek(seg.offset)
                        sd = self.fp.read(seg.size)
                        self.mem_map(seg.addr, seg.size)
                        self.mem_write(seg.addr, sd)
                    self.fp.seek(c)
        self.stack = self.mmap(STACK_SIZE, STACK_BASE)
        self.reg_write(self.sp, self.stack + STACK_SIZE - self.bsz)

    def write_argv(self, argv):
        size = sum([len(a) + 1 for a in argv])
        argv_addr = self.mmap(size)
        pos = argv_addr + size
        addrs = []
        for arg in reversed(argv):
            asz = len(arg) + 1
            self.mem_write(pos - asz, arg)
            pos -= asz
            addrs.append(pos)
        for addr in [0] + addrs:
            self.push(addr)
        return argv_addr

    # hooks

    def hook_mem_invalid(self, uc, access, address, size, value, user_data):
        if access == UC_MEM_WRITE:
            print(">>> Memory fault on WRITE at 0x%x, data size = %u, data value = 0x%x" % (address, size, value))
            self.mem_map(address, 2 * 1024 * 1024)
            return True
        else:
            # stop emulation
            return False

    def hook_intr(self, mu, intno, user_data):
        if intno == 80:
            if self.arch == 'x64':
                regs = [X86_REG_RAX, X86_REG_RDI, X86_REG_RSI, X86_REG_RDX, X86_REG_R10, X86_REG_R8, X86_REG_R9]
                num, a1, a2, a3, a4, a5, a6 = [self.reg_read(r) for r in regs]
                ret = 0
                if num == 0: # SYS_read
                    tmp = os.read(a1, a3)
                    self.mem_write(a2, tmp + '\0')
                    ret = len(tmp)
                elif num == 1: # SYS_write
                    ret = os.write(a1, self.mem_read(a2, a3))
                elif num == 2: # SYS_open
                    ret = os.open(self.mem_read_cstr(a1), a2, a3)
                elif num == 3: # SYS_close
                    os.close(a1)
                elif num == 8: # SYS_lseek
                    ret = os.lseek(a1, a2, a3)
                elif num == 9: # SYS_mmap
                    ret = self.mmap(a2, addr_hint=a1)
                elif num == 11: # SYS_munmap
                    pass
                elif num == 60: # SYS_exit
                    sys.exit(a1)
                else:
                    print 'Unsupported syscall:', num
                    sys.exit(1)
                self.reg_write(X86_REG_RAX, ret)
            else:
                print 'Arch not supported.'
                sys.exit(1)

    def hook_block(self, uc, address, size, user_data):
        name = self.symbolicate(address)
        print(">>> Basic block at %s, block size = 0x%x <<<" % (name, size))
        self.print_changed_regs()

    def hook_code(self, uc, addr, size, user_data):
        if size > 128:
            print 'Makeshift SIGILL'
            sys.exit(1)
        print '>',
        self.print_dis(addr, size)

    def hook_mem_access(self, uc, access, addr, size, value, user_data):
        if access == UC_MEM_WRITE:
            print 'W @0x%x 0x%x = 0x%x' % (addr, size, value)
        else:
            print ('R @0x%x 0x%x =' % (addr, size)), self.mem_hex(addr, size)

    def run(self, *argv):
        self.mu = Uc(self.info['ucarch'], self.info['ucbits'])
        self.map_segments()
        # self.mu.hook_add(UC_HOOK_BLOCK, self.hook_block)
        # self.mu.hook_add(UC_HOOK_CODE, self.hook_code)
        self.mu.hook_add(UC_HOOK_INTR, self.hook_intr)
        self.mu.hook_add(UC_HOOK_MEM_INVALID, self.hook_mem_invalid)
        # self.mu.hook_add(UC_HOOK_MEM_READ_WRITE, self.hook_mem_access)

        # put argv into target memory
        self.push(0) # envp
        argv_addr = self.write_argv(argv)
        self.push(len(argv)) # argc
        argv_size = sum([len(a) + 1 for a in argv]) + self.bsz * (len(argv) + 1)
        print '[argv]', self.mem_hex(argv_addr, argv_size)

        print '[entry point]'
        self.print_dis(self.entry, 64)
        print '[initial stack]', self.mem_hex(self.reg_read(self.sp), 64)

        print '====================================='
        print '==== Program output begins here. ===='
        print '====================================='
        self.mu.emu_start(self.entry, 0)
Ejemplo n.º 55
0
Archivo: ELF.py Proyecto: dagit/capdl
class ELF(object):
    def __init__(self, elf, name='', arch=None):
        """
        This constructor is overloaded and can accept either a string as the
        parameter 'elf', or a stream to ELF data. 'name' is only used when
        generating CapDL from the ELF file.
        """
        if isinstance(elf, six.string_types):
            f = open(elf, 'rb')
        else:
            f = elf
        self._elf = ELFFile(f)
        self.name = name
        self._symtab = None
        self.arch = arch or self.get_arch()

    def get_entry_point(self):
        return self._elf['e_entry']

    def _get_symbol(self, symbol):

        # If possible, let elftools do all the work.
        if hasattr(self._elf, 'get_symbol_by_name'):
            # From 46ae4bd this functionality is in elftools.
            sym = self._elf.get_symbol_by_name(symbol)
            if isinstance(sym, list):
                # From 9da4c45 get_symbol_by_name returns a list.
                return sym[0]
            return sym

        if self._symtab is None:
            table = self._elf.get_section_by_name('.symtab')
            if not table:
                # This ELF file has been stripped.
                raise Exception('No symbol table available')
            self._symtab = dict([(s.name, s) for s in table.iter_symbols()])

        return self._symtab.get(symbol)

    def get_symbol_vaddr(self, symbol):
        sym = self._get_symbol(symbol)
        if sym:
            return sym['st_value']
        return None

    def get_symbol_size(self, symbol):
        sym = self._get_symbol(symbol)
        if sym:
            return sym['st_size']
        return None

    def _safe_name(self):
        """
        Replace characters that the CapDL tools parse differently.
        """
        return re.sub(r'[^A-Za-z0-9]', '_', self.name)

    def get_arch(self):
        return self._elf.get_machine_arch()

    def get_pages(self, infer_asid=True, pd=None, use_large_frames=True):
        """
        Returns a dictionary of pages keyed on base virtual address, that are
        required to ELF load this file. Each dictionary entry is a dictionary
        containing booleans 'read', 'write' and 'execute' for the permissions
        of the page.
        """
        pages = PageCollection(self._safe_name(), self.arch, infer_asid, pd)

        # Various CAmkES output sections we are expecting to see in the ELF.
        TYPE = {"ignore": 1, "shared": 2, "persistent": 3, "guarded": 4}
        regex = re.compile("^(ignore_|shared_|persistent|guarded)");
        sections = [x for x in self._elf.iter_sections() if
            regex.match(_decode(x.name))]

        for seg in self._elf.iter_segments():
            if not seg['p_type'] == 'PT_LOAD':
                continue
            if seg['p_memsz'] == 0:
                continue

            regions = [{'addr': seg['p_vaddr'],
                        'size': seg['p_memsz'],
                        'type': 0}]
            relevant_sections = filter(seg.section_in_segment, sections)
            for sec in relevant_sections:
                region = [x for x in regions if
                    sec['sh_addr'] >= x['addr'] and sec['sh_addr'] < (x['addr'] + x['size'])]
                assert len(region) == 1
                region = region[0]
                orig_size = region['size']
                # Shrink the region to the range preceding this section.
                region['size'] = sec['sh_addr'] - region['addr']
                # Append a region for this section itself and that following
                # this section.
                regions += [{'addr': sec['sh_addr'],
                             'size': sec['sh_size'],
                             'type': TYPE[_decode(sec.name).split('_')[0]]},
                            {'addr': sec['sh_addr'] + sec['sh_size'],
                             'size': orig_size - region['size'] - sec['sh_size'],
                             'type': 0}]
            # Remove empty regions.
            regions[:] = [x for x in regions if x['size'] != 0]

            r = (seg['p_flags'] & P_FLAGS.PF_R) > 0
            w = (seg['p_flags'] & P_FLAGS.PF_W) > 0
            x = (seg['p_flags'] & P_FLAGS.PF_X) > 0

            # Allocate pages
            for reg in regions:
                if reg['type'] in [1, 2, 3, 4]:
                    # A range that must be backed by small pages.
                    vaddr = round_down(reg['addr'])
                    while vaddr < reg['addr'] + reg['size']:
                        pages.add_page(vaddr, r, w, x)
                        vaddr += PAGE_SIZE
                else:
                    # A range that is eligible for promotion.
                    possible_pages = list(reversed(page_sizes(self.arch)))
                    vaddr = round_down(reg['addr'])
                    remain = reg['addr'] + reg['size'] - vaddr
                    while vaddr < reg['addr'] + reg['size']:
                        size = PAGE_SIZE
                        if use_large_frames:
                            for p in possible_pages:
                                if remain >= p and vaddr % p == 0:
                                    size = p
                                    break
                        pages.add_page(vaddr, r, w, x, size)
                        vaddr += size
                        remain -= size

        return pages

    def get_spec(self, infer_tcb=True, infer_asid=True, pd=None,
            use_large_frames=True):
        """
        Return a CapDL spec with as much information as can be derived from the
        ELF file in isolation.
        """
        pages = self.get_pages(infer_asid, pd, use_large_frames)
        spec = pages.get_spec()

        if infer_tcb:
            # Create a single TCB.
            tcb = TCB('tcb_%s' % self._safe_name(), ip=self.get_entry_point(),
                elf=self.name)
            spec.add_object(tcb)
            tcb['vspace'] = pages.get_page_directory()[1]

        return spec

    def __repr__(self):
        return str(self._elf)
Ejemplo n.º 56
0
def parse(srcfile):
	f = open(srcfile,'rb')
	e = ELFFile(f)
	print "Current so is under ",e.get_machine_arch()


	dynamic_table = e.get_section_by_name(".dynamic")
	for tags in dynamic_table.iter_tags():
		if tags["d_tag"] == 'DT_INIT':
			inittag = tags
			break
	'''
	union {
		dptr
		dval
	}
	'''
	print "Entry Point : 0x%x"%(inittag["d_ptr"])

	for sections in e.iter_sections():
		if sections.header["sh_offset"] <= inittag["d_ptr"] <= sections.header["sh_offset"]+ sections.header["sh_size"]:
			entrysection = sections
		if sections.header.sh_type== 'SHT_LOUSER':
			datasection = sections

	entrysectiondata = entrysection.data()
	encrypteddata = datasection.data()
	print "LOUSER offset:0x%x"%(datasection.header.sh_offset)
	#should i search firstush move and find -0x4 byte -> find the start of info
	secondsegraw_offset = struct.unpack("<I",entrysectiondata[0x1c:0x1c+0x4])[0]
	secondsegraw_size = struct.unpack("<I",entrysectiondata[0x1c+0x4:0x1c+0x8])[0]
	secondsegraw_entry = struct.unpack("<I",entrysectiondata[0x10:0x10+0x4])[0]

	print hex(secondsegraw_offset +datasection.header.sh_offset )
	print hex(secondsegraw_size) 

	secondsegraw = encrypteddata[secondsegraw_offset :secondsegraw_offset+secondsegraw_size ]
	secondsegdecrypted = "".join(decryptSegment(secondsegraw))

	jmpsize = ord(secondsegdecrypted[0])
	# print "Jump additional header size:%x" %(jmpsize)

	segs = struct.unpack("<I",secondsegdecrypted[3*0x4:3*0x4+0x4])[0]
	#eachseginfosize = 0xc # src 0x4 dst 0x4 srcsize 0x2 dstsize 0x2
	siginfostart = struct.unpack("<I",secondsegdecrypted[2*0x4:2*0x4+0x4])[0]

	#x86  data in the [1] seg, code in the [2] seg  #arm  data in the [0] seg, code in the [1] seg
	#but we can just find data seg [-3] for offset data seg [-2] for size
	# dataseg[-6] is next entry with memoff(next's memoff)
	if e.get_machine_arch() == 'x86':
		(src,dst,srcsize,dstsize) = struct.unpack("<IIHH",secondsegdecrypted[siginfostart+ 1*0xc:siginfostart+(1+1)*0xc])	
		pass
	elif e.get_machine_arch() == 'ARM':
		(src,dst,srcsize,dstsize) = struct.unpack("<IIHH",secondsegdecrypted[siginfostart+ 0*0xc:siginfostart+(0+1)*0xc])
		pass
	(todo_offset , todo_size)= struct.unpack("<II",secondsegdecrypted[jmpsize+src+srcsize - 3*0x4:jmpsize+src+srcsize - 0x4])
	todo_entry = struct.unpack("<I",secondsegdecrypted[jmpsize+src+srcsize - 6*0x4:jmpsize+src+srcsize - (6-1 )* 0x4])[0]
	print "Find val:0x%x"%(todo_offset)
	print "Find val:0x%x"%(todo_size)	
	print "Entry :0x%x"%(todo_entry)
	todo_raw = encrypteddata[todo_offset :todo_offset+todo_size ]
	todo_decrypted = "".join(decryptSegment(todo_raw))

	todojmpsize = jmpsize = ord(todo_decrypted[0])
	for i in range(struct.unpack("<I",todo_decrypted[3*0x4:3*0x4+0x4])[0]):
		(src,dst,srcsize,dstsize) = struct.unpack("<IIHH",todo_decrypted[struct.unpack("<I",todo_decrypted[2*0x4:2*0x4+0x4])[0]+ i*0xc:struct.unpack("<I",todo_decrypted[2*0x4:2*0x4+0x4])[0]+(i+1)*0xc])	
		if  dst <= todo_entry <= dst+dstsize:
			print "src:0x%x dst:0x%x srcsize:0x%x dstsize:0x%x"%(src,dst,srcsize,dstsize) 
			print "code in this seg"
			break

	if e.get_machine_arch() == 'x86':
		cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32)
	elif e.get_machine_arch() == 'ARM':
		cs = capstone.Cs(capstone.CS_ARCH_ARM,capstone.CS_MODE_ARM)


	if e.get_machine_arch() == 'x86':
		tmp1 = re.findall('(\x55\x89\xe5.*\xe8....).*\xc9\xc3?', todo_decrypted[todojmpsize+todo_entry - (dst-src): src+srcsize+todojmpsize])[0]
		for i in cs.disasm(tmp1,todo_entry ):
			print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
		next_func_entry = int(i.op_str,16)
		next_func_end = todo_decrypted[next_func_entry+todojmpsize:].find('\xc3')
		print "next func entry :%x ends :%d"%(next_func_entry,next_func_end)
		next_func_text = todo_decrypted[next_func_entry - (dst-src)+todojmpsize:todojmpsize+next_func_entry + next_func_end+1]
		prev2 = None 
		prev1 = None
		for i in cs.disasm("".join(next_func_text),next_func_entry):
			print("0x%x:\t%s\t%s\t" %(i.address, i.mnemonic, i.op_str))
			if i.id == capstone.x86.X86_INS_JE and prev2.id ==capstone.x86.X86_INS_MOV and prev1.id == capstone.x86.X86_INS_CMP:
				print "Found"
				break
			prev2 = prev1
			prev1 = i
		modifytosegoffset = todojmpsize+ i.address  - (dst-src)
		print "MemAddress:%x"%(i.address)
		print "Segoffset: %x"%( modifytosegoffset)
		(newbyte,oldbyte) = getEncryptedFixByte(todo_raw, modifytosegoffset, 0xeb)
		#write back to
		modifyrawfilepos = datasection.header.sh_offset + todo_offset+modifytosegoffset
		print "Modify raw file @0x%x = 0x%x + 0x%x + 0x%x" %(modifyrawfilepos, datasection.header.sh_offset , todo_offset, modifytosegoffset)
		with open(srcfile,'r+b') as wf:
			wf.seek(modifyrawfilepos)
			cmpbyte = wf.read(1)
			assert cmpbyte == oldbyte
			wf.seek(modifyrawfilepos)
			wf.write(newbyte)
		print "DONE!!!"
	elif e.get_machine_arch() == 'ARM':
		#push	{fp, lr} --> bl xxx --> pop	{fp, pc}
		tmp1 = re.findall('(\x00\x48\x2d\xe9.*...\xeb).*\x00\x88\xbd\xe8?', todo_decrypted[todojmpsize+todo_entry - (dst-src): src+srcsize+todojmpsize])[0]
		for i in cs.disasm(tmp1,todo_entry ):
				print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))
		next_func_entry = int(i.op_str[1:],16) #remove trailing '#'
		next_func_end = todo_decrypted[next_func_entry+todojmpsize:].find('\x00\x88\xbd\xe8')
		print "next func entry :%x ends :%d"%(next_func_entry,next_func_end)
		next_func_text = todo_decrypted[next_func_entry - (dst-src)+todojmpsize:todojmpsize+next_func_entry + next_func_end+4]
		prev1 = None
		for i in cs.disasm("".join(next_func_text),next_func_entry):
			print("0x%x:\t%s\t%s\t" %(i.address, i.mnemonic, i.op_str))
			if i.id == capstone.arm.ARM_INS_B  and prev1.id == capstone.arm.ARM_INS_CMP:
				print "Found"
				break
			prev1 = i	
		modifytosegoffset = todojmpsize+ i.address  - (dst-src) +0x3 # cause operand in th end
		print "MemAddress:%x"%(i.address)
		print "Segoffset: %x"%( modifytosegoffset)
		(newbyte,oldbyte) = getEncryptedFixByte(todo_raw, modifytosegoffset, 0xea) # beq(0a) -> b(ea)
		#write back to
		modifyrawfilepos = datasection.header.sh_offset + todo_offset+modifytosegoffset
		print "Modify raw file @0x%x = 0x%x + 0x%x + 0x%x" %(modifyrawfilepos, datasection.header.sh_offset , todo_offset, modifytosegoffset)
		with open(srcfile,'r+b') as wf:
			wf.seek(modifyrawfilepos)
			cmpbyte = wf.read(1)
			assert cmpbyte == oldbyte
			wf.seek(modifyrawfilepos)
			wf.write(newbyte)
		print "DONE!!!"
Ejemplo n.º 57
0
def main():
    filename = sys.argv[1]
    elf = ELFFile(file(filename))
    print('[II] Object %s is a %s_%s elf' % (filename, elf.get_machine_arch(), elf.elfclass))
    assert elf.elfclass == 64 and elf.get_machine_arch() == 'x64'

    print "[II] Elf has %d sections."% elf.num_sections()

    selected_sections = []
    for section_prefix in ['.text', '.data', '.rodata', '.bss']:
        for section in elf.iter_sections():
            if section.name.startswith(section_prefix):
                selected_sections.append(section.name)

    offsets = {}
    shellcode = StringIO('')
    for section_name in selected_sections:
        offsets[section_name] = shellcode.len
        try:
            s = elf.get_section_by_name(section_name)
            if s['sh_type'] == 'SHT_NOBITS':
                data = chr(0) * s['sh_size']
            else:
                data = elf.get_section_by_name(section_name).data()
            print "[II] Section %s is %d bytes offset %d"%(section_name,len(data),offsets[section_name])
        except:
            data = ''
            print '[WW] No %s section'%section_name
        shellcode.write(data)
        # padding to 16
        shellcode.write(chr(0) * (16-shellcode.len % 16))

    print "[II] Total packed data size %d" % shellcode.len

    relocs = []
    for section_name in selected_sections:
        reloc_section = find_relocations_for_section(elf, section_name)
        if reloc_section is None:
            continue
        symtab = elf.get_section(reloc_section['sh_link'])
        for reloc in reloc_section.iter_relocations():
            #print reloc
            #assert elf.get_machine_arch() == 'x64' and not reloc.is_RELA()
            assert elf.get_machine_arch() == 'x64' and reloc.is_RELA()
            reloc_base = offsets[section_name]
            reloc_offset = reloc['r_offset']
            reloc_type = reloc['r_info_type']
            target_symbol = symtab.get_symbol(reloc['r_info_sym'])
            target_name = elf.get_section(target_symbol['st_shndx']).name
            target_base = offsets[target_name]
            target_offset = target_symbol['st_value']

            shellcode.seek(reloc_base+reloc_offset)
            value = struct.unpack("<l",shellcode.read(4))[0]     #+ reloc['r_addend']
            #print "RELOC:",section_name, '0x%x' % reloc_base, '0x%x' % reloc_offset, "=>", target_name, '0x%x' % target_base,'0x%x' % target_offset, value, '(%s)' % target_symbol.name
            if reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_32']:
                value = target_base + target_offset + value + reloc['r_addend']
                relocs.append(reloc_base+reloc_offset)
                print "[II] Offset ",reloc_base+reloc_offset, "added to reloc list"
            elif reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_PC32']:
                value = (target_base + target_offset) -  (reloc_base + reloc_offset) + value + reloc['r_addend']

            elif reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_32S']:
                value = target_base + target_offset + value+ reloc['r_addend']
                relocs.append(reloc_base+reloc_offset)
            else:
                assert reloc_type == ENUM_RELOC_TYPE_x64['R_X86_64_NONE']
            shellcode.seek(reloc_base + reloc_offset)
            shellcode.write(struct.pack("<L",value&0xffffffff))
        shellcode.seek(shellcode.len)

    def to_c_array(s):
        if len(s) % 4 != 0:
            s += chr(0) * (4 - len(s) % 4)
        bs = map(ord, s)
        result = ''
        for i in range(0, len(bs), 8):
            result += ' ' + ''.join(' 0x%02x,' % b for b in bs[i:i+8]) + '\n'
        return result

    def to_c_array2(arr):
        result = ''
        for i in range(0, len(arr), 10):
            result += ' ' + ''.join(' %d,' % x for x in arr[i:i+10]) + '\n'
        return result

    with file('bot_opt.cc', 'w') as fp:
        bss_size = elf.get_section_by_name('.bss')['sh_size']
        assert shellcode.getvalue()[-bss_size:] == chr(0) * bss_size

        pagesize = 4096

        fp.write('''#include <sys/mman.h>
#include "bot_opt.h"

static unsigned char code[%d] __attribute__((aligned(4096))) = {
%s};
static int patch[] = {
%s};
''' % (
        (shellcode.len + pagesize-1) / pagesize * pagesize,
        to_c_array(shellcode.getvalue()[:-bss_size]),
        to_c_array2(relocs),
        ))
    
            #fp.write('reloc %d\n' % rel)
        fp.write('''
void load_code() {
  if ((uintptr_t)code > 0xffffffffull)
    return;
  mprotect(code, sizeof(code), PROT_READ|PROT_WRITE|PROT_EXEC);
  for (unsigned int i = 0; i < sizeof(patch)/sizeof(patch[0]); i++) {
    *(uint32_t*)(void*)(code + patch[i]) += (uintptr_t)code;
  }
''')

    #with file('bot_opt.bin', 'wb') as fp:
    #    fp.write(shellcode.getvalue())

    
        # export symbols
        for entry in (
                'root_search_move',
                'init_bot',
                'max_lookahead',
                'maybe_dead_threshold',
                'search_threshold',
                'cache1_clear',
                ):
            symbol = None
            for s in elf.get_section_by_name('.symtab').iter_symbols():
                if s.name == entry:
                    symbol = s
            assert symbol
            section = elf.get_section(symbol['st_shndx']).name
            base = offsets[section]
            offset = symbol['st_value']
            
            start = base + offset
            print section, entry, start
            if section == '.text':
                fp.write('  %s_func = (%s_func_t)((char*)code + %d);\n' % (
                    entry, entry, start))
            else:
                fp.write('  %s_ptr = (%s_ptr_t)((char*)code + %d);\n' % (
                    entry, entry, start))
            #fp.write('%s %s %d\n' % (section, entry, start))


        fp.write('''}
''')
Ejemplo n.º 58
0
class Disassembler():

    memory = []

    def __init__(self, filename):
        self.filename = filename
        self.loadELF(filename)

    def readMemory(self, address, size):
        for vaddr, foffset, memsize, mem in self.memory:
            if address >= vaddr and address <= vaddr + memsize:
                if size:
                    return mem[address - vaddr : address - vaddr + size]
                else:
                    return mem[address - vaddr:]
        return ""

    def writeMemory(self, address, data):
        offset = self.addr2offset(address)
        for idx, (vaddr, foffset, memsize, mem) in enumerate(self.memory):
            if offset >= foffset and offset <= foffset + memsize:
                mem=list(mem)
                for i in range(0, len(data)):
                    if offset - foffset + i < len(mem):
                        mem[offset - foffset + i] = data[i]
                    else:
                        mem.append(data[i])
                        memsize+=1
                self.memory[idx] = (vaddr, foffset, memsize, ''.join(mem))

    def addr2offset(self, address):
        for vaddr, foffset, memsize, mem in self.memory:
            if address >= vaddr and address <= vaddr + memsize:
                return address - vaddr + foffset
        return -1

    def loadELF(self, filename):
        try:
            self.elf = ELFFile(file(sys.argv[1]))
        except:
            print "[-] It is not ELF file: "+sys.argv[1]
            sys.exit()

        self.arch = self.elf.get_machine_arch()

        if self.arch == 'ARM':
            self.arm_arch = self.get_tag_cpu_arch()

        # Load code segments
        for elf_segment in self.elf.iter_segments():
            if elf_segment.header.p_type != 'PT_LOAD':
                continue

            align = 0x1000
            ELF_PAGEOFFSET = elf_segment.header.p_vaddr & (align-1)

            memsz = elf_segment.header.p_memsz + ELF_PAGEOFFSET
            offset = elf_segment.header.p_offset - ELF_PAGEOFFSET
            filesz = elf_segment.header.p_filesz + ELF_PAGEOFFSET
            vaddr = elf_segment.header.p_vaddr - ELF_PAGEOFFSET
            memsz = (memsz + align ) & ~(align-1)

            with open(sys.argv[1], 'rb') as f:
                f.seek(offset, 0)
                data = f.read(filesz)
                self.memory.append((vaddr, offset, memsz, data))

        self.entry = self.elf.header.e_entry

        # Load symbol table
        self.symtab = dict()
        self.thumbtab = list()
        for section in self.elf.iter_sections():
            if isinstance(section, SymbolTableSection):
                for symbol in section.iter_symbols():
                    if symbol['st_info']['type'] == 'STT_FUNC':
                        if self.isThumb(symbol['st_value']):
                            self.symtab[symbol['st_value'] - 1] = symbol.name
                        else:
                            self.symtab[symbol['st_value']] = symbol.name
                    elif self.arch == 'ARM' and symbol['st_info']['type'] == 'STT_NOTYPE':
                        if symbol.name == '$t':  # Thumb
                            self.thumbtab.append((symbol['st_value'], True))
                        elif symbol.name == '$a':   #ARM
                            self.thumbtab.append((symbol['st_value'], False))

        self.thumbtab.sort(key=lambda tup: tup[0])

        text_section = self.elf.get_section_by_name(b'.text')
        self.text = text_section.data()
        self.text_addr = text_section['sh_addr']
        self.text_size = text_section['sh_size']

        arch = {'x86':CS_ARCH_X86,'x64':CS_ARCH_X86, 'ARM':CS_ARCH_ARM}[self.arch]
        mode = {'x86':CS_MODE_32, 'x64':CS_MODE_64, 'ARM':CS_MODE_ARM}[self.arch]
        self.md = Cs(arch, mode)
        if self.arch == 'ARM':
            self.t_md = Cs(arch, CS_MODE_THUMB)

    def disasm(self, address, size=None):
        if self.arch == 'ARM':
            disasms = []
            thumb = False
            if (address & 1) == 1:
                thumb = True
            address = address & -2
            for addr, isThumb in self.thumbtab:
                if address < addr:
                    if thumb:
                        disasms.extend([(i, True) for i in self.t_md.disasm(self.readMemory(address, addr-address), address)])
                    else:
                        disasms.extend([(i, False) for i in self.md.disasm(self.readMemory(address, addr-address), address)])
                address = addr
                thumb = isThumb
            return disasms
        else:
            return [(i, False) for i in self.md.disasm(self.readMemory(address, size), address)]

    def save(self):
        def saveBinary(filename):
            def saveBinaryYes(yn, filename):
                if yn == 'y':
                    try:
                        original_binary = open(self.filename, 'rb').read()
                        f = open(filename, 'wb')
                        f.write(original_binary)
                        for vaddr, foffset, memsize, mem in self.memory:
                            f.seek(foffset, 0)
                            f.write(mem)
                        f.close()
                        os.chmod(filename, 0755)
                        return "Successfully save to '%s'" % filename
                    except Exception, e:
                        return "Fail to save binary: "+str(e)

                return "Fail to save binary"

            if os.path.exists(filename):
                return (filename+" already exists, Overwrite?", saveBinaryYes, filename)
            else:
                return saveBinaryYes('y', filename)

        signals.set_prompt.send(self, text="Save to (filename): ", callback=saveBinary)
Ejemplo n.º 59
0
class ImageInfo(object):

    ##
    # Initialize internals.
    #
    #   @param ImagePath file path to image
    #
    def __init__(self, ImagePath):
        
        # internals
        self._Path = ImagePath
        self._Handle = None
        self._SizeBytes = 0
        self._Elf = None
        self._IsExecutable = False
        self._Sections = {}
        self._SectionsFast = None
        self._Segments = []
        self._Strings = {}
        self._Symbols = {}
        self._TextInstructions = {}
        self._PLTInstructions = {}
        self._SymbolsFast = None
        self._TextSection = None
        self._PLTSection = None
        self._SymbolTable = None
        self._StringTable = None

    ##
    # Get infos about the given image.
    #
    #   @return none
    #
    def parseImage(self):
        
        # basic stats
        self._SizeBytes = os.path.getsize(self._Path)
        
        # open
        self._Handle = open(self._Path, 'rb')
        self._Elf = ELFFile(self._Handle)
        
        # executable
        if self._Elf['e_type'] == 'ET_EXEC':
            self._IsExecutable = True

        # header string table
        hdstrtbl = None
        cnt = 0
        for sec in self._Elf.iter_sections():
            if (sec['sh_type'] == 'SHT_STRTAB') and (self._Elf['e_shstrndx'] == cnt):
                hdstrtbl = sec
                break
            cnt += 1
        if hdstrtbl is None:
            raise Exception("[ERROR] Could not find header string table!")
        
        # register segments
        for seg in self._Elf.iter_segments():
            self._Segments.append(seg)
        
        # register sections
        for sec in self._Elf.iter_sections():
            curname = hdstrtbl.get_string(sec['sh_name'])
            if (sec['sh_addr'] != 0):
                cursec = SectionInfo()
                cursec._Name = curname
                cursec._Addr = sec['sh_addr']
                cursec._Size = sec['sh_size']
                cursec._Obj = sec
                self._Sections[cursec._Addr] = cursec

        # special sections
        secnames = []
        for sec in self._Elf.iter_sections():
            if sec['sh_size'] > 0:
                secnames.append(hdstrtbl.get_string(sec['sh_name']))
        if ('.text' not in secnames):
            raise Exception("[ERROR] No text section found!")
        if (('.symtab' not in secnames) and ('.dynsym' not in secnames)):
            raise Exception("[ERROR] No symbol table found!")
        if (('.strtab' not in secnames) and ('.dynstr' not in secnames)):
            raise Exception("[ERROR] No string table found!")
        usedebugtables = (('.symtab' in secnames) and ('.strtab' in secnames))
        
        # register special sections
        for sec in self._Elf.iter_sections():
            if sec['sh_size'] > 0:
                cursec = SectionInfo()
                cursec._Name = hdstrtbl.get_string(sec['sh_name'])
                cursec._Addr = sec['sh_addr']
                cursec._Size = sec['sh_size']
                cursec._Obj = sec
                if cursec._Name == '.text':
                    self._TextSection = cursec
                elif cursec._Name == '.plt':
                    self._PLTSection = cursec
                elif (cursec._Name == '.symtab') and usedebugtables:
                    self._SymbolTable = cursec
                elif (cursec._Name == '.strtab') and usedebugtables:
                    self._StringTable = cursec
                elif (cursec._Name == '.dynsym') and not usedebugtables:
                    self._SymbolTable = cursec
                elif (cursec._Name == '.dynstr') and not usedebugtables:
                    self._StringTable = cursec

        # sanity check
        if (self._TextSection is None):
            raise Exception("[ERROR] Could not assign text section!")
        if (self._PLTSection is None):
            raise Exception("[ERROR] Could not assign plt section!")
        if (self._SymbolTable is None):
            raise Exception("[ERROR] Could not assign symbol table!")
        if (self._StringTable is None):
            raise Exception("[ERROR] Could not assign string table!")
        
        # parse strings
        binstr = self._StringTable._Obj.data()
        binstrdec = binstr.decode()
        curstart = 0
        for cmatch in re.finditer('\x00', binstrdec):
            curstr = binstr[curstart:cmatch.start()].decode("utf-8")
            if curstr != "":
                self._Strings[curstart] = curstr
            curstart = cmatch.start() + 1
        self._Strings[0] = ''
        
        # register symbols
        for symb in self._SymbolTable._Obj.iter_symbols():
            if (symb['st_value'] != 0) and \
               (symb['st_info']['type'] != 'STT_SECTION') and \
               (symb['st_info']['type'] != 'STT_FILE') and \
               (symb['st_info']['type'] != 'STT_NOTYPE') and \
               (symb['st_info']['bind'] != 'STB_LOCAL'):
                
                # new symbol
                cursymb = SymbolInfo()
                cursymb._Name = symb.name
                cursymb._Addr = symb['st_value']
                cursymb._Size = symb['st_size']
                cursymb._Type = symb['st_info']['type']
                cursymb._Obj = symb
                
                # fix name
                if cursymb._Name == '':
                    cursymb._Name = '0x%08x' % cursymb._Addr
                
                # safe add
                if cursymb._Addr in self._Symbols.keys():
                    if sys.stdout.isatty():
                        print ("[INFO] Symbols with same start addr: new=%s and old=%s" \
                               % (cursymb._Name, self._Symbols[cursymb._Addr]._Name))
                    if cursymb._Size == self._Symbols[cursymb._Addr]._Size:
                        self._Symbols[cursymb._Addr]._Name += ("+%s" % cursymb._Name)
                    elif cursymb._Size > self._Symbols[cursymb._Addr]._Size:
                        cursymb._Name += ("+%s(len=%d)" % \
                                          (self._Symbols[cursymb._Addr]._Name, \
                                           self._Symbols[cursymb._Addr]._Size))
                        self._Symbols[cursymb._Addr] = cursymb
                    elif cursymb._Size < self._Symbols[cursymb._Addr]._Size:
                        self._Symbols[cursymb._Addr]._Name += ("+%s(len=%d)" % \
                                                               (cursymb._Name, \
                                                                cursymb._Size))
                else:
                    self._Symbols[cursymb._Addr] = cursymb

        # prune overlay functions
        ksort = sorted(self._Symbols.keys())
        krem = []
        for i in range(0, len(ksort)-1):
            if ((self._Symbols[ksort[i]]._Addr + self._Symbols[ksort[i]]._Size) > \
                self._Symbols[ksort[i+1]]._Addr) and \
               ((self._Symbols[ksort[i]]._Addr + self._Symbols[ksort[i]]._Size) == \
                (self._Symbols[ksort[i+1]]._Addr + self._Symbols[ksort[i+1]]._Size)):
                krem.append((ksort[i], ksort[i+1]))
        for k in krem:
            if sys.stdout.isatty():
                print ("[INFO] Pruning overlay function %s." % self._Symbols[k[1]]._Name)
            self._Symbols[k[0]]._Name += ("+%s(%d)" % \
                                          (self._Symbols[k[1]]._Name, k[1]-k[0]))
            self._Symbols.pop(k[1])

        # fast access
        self._SectionsFast = numpy.zeros(len(self._Sections), \
                                         dtype=numpy.dtype([('Start', numpy.uintp, 1), \
                                                            ('Size', numpy.uintp, 1)]))
        ksort = sorted(self._Sections.keys())
        for i in range(0, len(self._Sections)):
            self._SectionsFast[i]['Start'] = self._Sections[ksort[i]]._Addr
            self._SectionsFast[i]['Size'] = self._Sections[ksort[i]]._Size
        self._SymbolsFast = numpy.zeros(len(self._Symbols), \
                                        dtype=numpy.dtype([('Start', numpy.uintp, 1), \
                                                           ('Size', numpy.uintp, 1)]))
        ksort = sorted(self._Symbols.keys())
        for i in range(0, len(self._Symbols)):
            self._SymbolsFast[i]['Start'] = self._Symbols[ksort[i]]._Addr
            self._SymbolsFast[i]['Size'] = self._Symbols[ksort[i]]._Size

        # consistency check
        for i in range(0, len(self._SectionsFast)-1):
            if self._SectionsFast[i]['Start'] + self._SectionsFast[i]['Size'] > \
               self._SectionsFast[i+1]['Start']:
                raise Exception('[ERROR] Inconsistent section placement!')
        for i in range(0, len(self._SymbolsFast)-1):
            if self._SymbolsFast[i]['Start'] + self._SymbolsFast[i]['Size'] > \
               self._SymbolsFast[i+1]['Start']:
                raise Exception('[ERROR] Inconsistent symbol placement: %s -> %s!' % \
                                (self._Symbols[self._SymbolsFast[i]['Start']]._Name, \
                                 self._Symbols[self._SymbolsFast[i+1]['Start']]._Name))
        
        # set up disassembler
        if 'x64' in self._Elf.get_machine_arch().lower():
            md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
        elif 'x86' in self._Elf.get_machine_arch().lower():
            md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32)
        elif 'arm' in self._Elf.get_machine_arch().lower():
            md = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM)
        elif 'aarch64' in self._Elf.get_machine_arch().lower():
            md = capstone.Cs(capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM + \
                             capstone.CS_MODE_V8)
        else:
            raise Exception("[ERROR] Image architecture currently not supported!")
        md.skipdata = True
        
        # parse .text section
        instructions = md.disasm_lite(self._TextSection._Obj.data(), \
                                      self._TextSection._Addr)
        for (address, size, mnemonic, op_str) in instructions:
            self._TextInstructions[address] = (size, "%s\t%s" % (mnemonic, op_str))

        # parse .plt instructions
        instructions = md.disasm_lite(self._PLTSection._Obj.data(), \
                                      self._PLTSection._Addr)
        for (address, size, mnemonic, op_str) in instructions:
            self._PLTInstructions[address] = (size, "%s\t%s" % (mnemonic, op_str))

    ##
    # Get section from given address.
    #
    #   @param Address address within image
    #   @return the section of the address (None if error)
    #
    def getSection(self, Address):
        
        # find
        idx = numpy.argwhere(self._SectionsFast[:]['Start'] <= Address).flatten()
        if len(idx) == 0:
            return None
        
        # check
        if Address < self._SectionsFast[idx[-1]]['Start'] + \
           self._SectionsFast[idx[-1]]['Size']:
            return (self._Sections[self._SectionsFast[idx[-1]]['Start']])
        else:
            return None

    ##
    # Get symbol from given address.
    #
    #   @param Address address within image
    #   @return the symbol of the address (None if error)
    #
    def getSymbol(self, Address):
        
        # find
        idx = numpy.argwhere(self._SymbolsFast[:]['Start'] <= Address).flatten()
        if len(idx) == 0:
            return None
        
        # check
        if Address < self._SymbolsFast[idx[-1]]['Start'] + \
           self._SymbolsFast[idx[-1]]['Size']:
            return (self._Symbols[self._SymbolsFast[idx[-1]]['Start']])
        else:
            return None

    ##
    # Get instruction from given address.
    #
    #   @param Address address within image
    #   @return size of instr. and assembly code (None if error)
    #
    def getInstruction(self, Address):
        
        # get section
        sec = self.getSection(Address)
        if sec is None:
            return None
  
        # search
        if sec._Name == '.text':
            if Address in self._TextInstructions.keys():
                return (self._TextInstructions[Address])
        elif sec._Name == '.plt':
            if Address in self._PLTInstructions.keys():
                return (self._PLTInstructions[Address])
        
        # error
        return None