Ejemplo n.º 1
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # The first DIE in each compile unit describes it.
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # Each DIE holds an OrderedDict of attributes, mapping names to
            # values. Values are represented by AttributeValue objects in
            # elftools/dwarf/die.py
            # We're interested in the DW_AT_name attribute. Note that its value
            # is usually a string taken from the .debug_str section. This
            # is done transparently by the library, and such a value will be
            # simply given as a string.
            name_attr = top_DIE.attributes['DW_AT_name']
            print('    name=%s' % bytes2str(name_attr.value))
Ejemplo n.º 2
0
def process_file(filename):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return {}, {}

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()
        func_map, global_map, type_map, struct_map, global_access_map = {}, {}, {}, {}, {}

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            top_DIE = CU.get_top_DIE()
            variables = {}
            members = {}
		

            die_info_rec_struct(top_DIE, struct_map, members, global_access_map)  
            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE, func_map, global_map, type_map, struct_map, variables, global_access_map)
	
	
        return func_map, global_map, type_map, global_access_map
Ejemplo n.º 3
0
 def __init__(self, filename):
     self.filename = filename
     with open(filename, 'rb') as fp:
         elf = ELFFile(fp)
         assert elf.has_dwarf_info(), \
                "No DWARF information for '{}'".format(filename)
         self.parseAddressRanges(elf.get_dwarf_info())
Ejemplo n.º 4
0
def load_dwarf_info(mmap):
    """ Load or reload all dwarf info from mmap. """
    for filename in mmap:
        if filename.startswith("["):
            continue
        elffile = ELFFile(open(filename, "rb"))
        if not elffile.has_dwarf_info():
            continue

        dwarfinfo = elffile.get_dwarf_info()
        # Information from Compilation Units (CUs)
        cus = []
        for cu in dwarfinfo.iter_CUs():
            lineprog = dwarfinfo.line_program_for_CU(cu)
            states = [
                entry.state for entry in lineprog.get_entries()
                if entry.state and not entry.state.end_sequence
            ]
            addresses = [state.address for state in states]
            dies = [{
                "entry": die,
                "bounds": die_bounds(die),
                "name": die.attributes['DW_AT_name'].value
            } for die in cu.iter_DIEs() if die.tag == 'DW_TAG_subprogram']
            cus.append({
                "lineprog": lineprog,
                "states": states,
                "addresses": addresses,
                "entries": dies
            })
        all_dwarf_info[filename] = {"dwarfinfo": dwarfinfo, "units": cus}
Ejemplo n.º 5
0
 def test_range_list_absence(self):
     with open(
             os.path.join('test', 'testfiles_for_unittests',
                          'arm_with_form_indirect.elf'), 'rb') as f:
         elffile = ELFFile(f)
         self.assertTrue(elffile.has_dwarf_info())
         self.assertIsNone(elffile.get_dwarf_info().range_lists())
Ejemplo n.º 6
0
 def _get_impalad_dwarf_info(self):
   """
   Read the impalad_path ELF binary, which is supposed to contain DWARF, and read the
   DWARF to understand the compiler options. Return a 2-tuple of the two useful DIE
   attributes of the first compile unit: the DW_AT_name and DW_AT_producer. If
   something goes wrong doing this, log a warning and return nothing.
   """
   # Some useful references:
   # - be/CMakeLists.txt
   # - gcc(1), especially -grecord-gcc-switches, -g, -ggdb, -gdwarf-2
   # - readelf(1)
   # - general reading about DWARF
   # A useful command for exploration without having to wade through many bytes is:
   # readelf --debug-dump=info --dwarf-depth=1 impalad
   # The DWARF lines are long, raw, and nasty; I'm hesitant to paste them here, so
   # curious readers are highly encouraged to try the above, or read IMPALA-3501.
   die_name = None
   die_producer = None
   try:
     with open(self.impalad_path, 'rb') as fh:
       impalad_elf = ELFFile(fh)
       if impalad_elf.has_dwarf_info():
         dwarf_info = impalad_elf.get_dwarf_info()
         # We only need the first CU, hence the unconventional use of the iterator
         # protocol.
         cu_iterator = dwarf_info.iter_CUs()
         first_cu = next(cu_iterator)
         top_die = first_cu.get_top_DIE()
         die_name = top_die.attributes['DW_AT_name'].value
         die_producer = top_die.attributes['DW_AT_producer'].value
   except Exception as e:
     LOG.warn('Failure to read DWARF info from {0}: {1}'.format(self.impalad_path,
                                                                str(e)))
   return die_name, die_producer
    def generate_header(self, data_out_filename, glob_data_out, namesp_out):
        """Find all top level (global) variables in the ELF file and generate
        a header.
        """
        glob_data_out.write("/* generated by userspace-header-gen.py */\n")
        glob_data_out.write("#include <rtems/linkersets.h>\n")

        namesp_out.write("/* generated by userspace-header-gen.py */\n")

        for objfile in self._objfiles:
            elffile = ELFFile(objfile)
            if not elffile.has_dwarf_info():
                raise NoDwarfInfoError()

            # Don't relocate DWARF sections. This is not necessary for us but
            # makes problems on ARM with current pyelftools (version 0.24)
            dwarfinfo = elffile.get_dwarf_info(relocate_dwarf_sections=False)

            for cu in dwarfinfo.iter_CUs():
                if self._verbose >= VERBOSE_SOME:
                    self._err.write('Found a CU at offset %s, length %s\n' % \
                                    (cu.cu_offset, cu['unit_length']))

                lineprog = dwarfinfo.line_program_for_CU(cu)
                headergen = HeaderGenCU(cu, self._progname, lineprog,
                                        self._err, self._verbose,
                                        self._filterre)
                headergen.generate_header(data_out_filename, glob_data_out,
                                          namesp_out)
Ejemplo n.º 8
0
def resolve_addr(fn, ip):
    if fn in open_files:
        elffile = open_files[fn]
    else:
        f = open(fn, 'rb')
        elffile = ELFFile(f)
        open_files[fn] = elffile

    if fn not in lines and elffile.has_dwarf_info():
        lines[fn] = build_line_table(elffile.get_dwarf_info())

    if fn not in symtables:
        symtables[fn] = build_symtab(elffile)

    loc = None
    offset = None
    if fn in symtables:
        sym = find_le(symtables[fn], ip)
        if sym:
            loc, offset = sym[2], ip - sym[0]

    src = None
    if fn in lines:
        pos = find_le(lines[fn], ip)
        if pos:
            src = "%s:%d" % (pos[2], pos[3])    

    return loc, offset, src
Ejemplo n.º 9
0
def process_file(filename, root, pkgroot):
    res = set()
    if os.path.isdir(filename) or not os.access(filename, os.R_OK):
        return res
    try:
        with open(filename, 'rb') as f:
            try:
                elffile = ELFFile(f)
                if not elffile.has_dwarf_info():
                    return res

                dwarfinfo = elffile.get_dwarf_info()
                for CU in dwarfinfo.iter_CUs():
                    # Every compilation unit in the DWARF information may or may not
                    # have a corresponding line program in .debug_line.
                    line_program = dwarfinfo.line_program_for_CU(CU)
                    if line_program is None:
                        continue

                    # Print a reverse mapping of filename -> #entries
                    res.update(line_entry_mapping(line_program))
            except ELFError:
                return find_in_source_root(filename, root, pkgroot)
    except OSError:
        pass
    return res
Ejemplo n.º 10
0
def get_functions(lib_path):
    functions = {}
    data = None
    offset = None
    try:
        from elftools.elf.elffile import ELFFile
        from elftools.common.exceptions import ELFError

        with open(lib_path, 'rb') as stream:
            elffile = ELFFile(stream)

            if not elffile.has_dwarf_info():
                print('file has no DWARF info')

            scan_section(functions, elffile, lib_path,
                         elffile.get_section_by_name('.symtab'))
            scan_section(functions, elffile, lib_path,
                         elffile.get_section_by_name('.dynsym'))

            # .text section hex dump
            section = elffile.get_section_by_name('.text')
            if section:
                data = section.data()
                offset = section['sh_offset']

        return functions, offset, data

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print "[%s, %s, %s] Error extracting functions: %s" % (
            exc_type, fname, exc_tb.tb_lineno, str(e))
Ejemplo n.º 11
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset {0!s}, length {1!s}'.format(
                CU.cu_offset, CU['unit_length']))

            # The first DIE in each compile unit describes it.
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag={0!s}'.format(top_DIE.tag))

            # We're interested in the filename...
            print('    name={0!s}'.format(top_DIE.get_full_path()))
Ejemplo n.º 12
0
class DwarfInfo:
    def __init__(self, exe):
        self.binary = exe
        self.fd = open(exe, 'rb')
        self.elf_file = ELFFile(self.fd)

        if not self.elf_file.has_dwarf_info():
            raise ('Binary contains no dwarf info section.')
        self.dwarf_info = self.elf_file.get_dwarf_info()

    def __del__(self):
        self.fd.close()

    def lookup(self, address):
        # iterate over the compile units(CUs)
        for CU in self.dwarf_info.iter_CUs():
            line_progs = self.dwarf_info.line_program_for_CU(CU)
            prev_state = None
            # debug-line parse the table like `dwarfdump -debug-line ./main`
            for entry in line_progs.get_entries():
                if entry.state is None:
                    continue
                if entry.state.end_sequence:
                    prev_state = None
                    continue
                if prev_state and prev_state.address <= address < entry.state.address:
                    file_name = line_progs['file_entry'][prev_state.file -
                                                         1].name
                    line = prev_state.line
                    return file_name, line
                prev_state = entry.state
        raise ('Could not find address')
Ejemplo n.º 13
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # Start with the top DIE, the root for this CU's DIE tree
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # We're interested in the filename...
            print('    name=%s' % top_DIE.get_full_path())

            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE)
Ejemplo n.º 14
0
def retrieve_pub_functions(binary):
    symbols = []
    with open(binary, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return -1

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # get .debug_pubtypes section.
        pubnames = dwarfinfo.get_pubnames()
        if pubnames is None:
            print('ERROR: No .debug_pubnames section found in ELF.')
            return -1
        else:
            print('%d entries found in .debug_pubnames' % len(pubnames))

            # dump all entries in .debug_pubnames section.
            print('Dumping .debug_pubnames table ...')
            print('-' * 66)
            print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS'))
            print('-' * 66)
            for (name, entry) in pubnames.items():
                symbols.append(name)
                print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs))
                #print(entry)
            print('-' * 66)

            print(symbols)

            return symbols
Ejemplo n.º 15
0
    def __init__(self, binary: str):
        with open(binary, "rb") as b:
            elffile = ELFFile(b)

            #Symbol table
            for section in elffile.iter_sections():
                if isinstance(section, SymbolTableSection):
                    self.symbols = [Symbol(sym.name, sym['st_value'], sym['st_info']['type']) for sym in section.iter_symbols()
                                    if len(sym.name) > 0]

                    continue

            if not elffile.has_dwarf_info():
                raise Exception("This tool needs gdb info.")

            dbg = elffile.get_dwarf_info()

            def file_entry_to_abs(file_entry, linep: LineProgram) -> str:
                di = file_entry.dir_index
                if di > 0:
                    return path.join(linep['include_directory'][di-1].decode(), file_entry.name.decode())
                else:
                    return path.join('.', file_entry.name.decode())

            cu_helper = [(cu, dbg.line_program_for_CU(cu)) for cu in dbg.iter_CUs()]

            self.compile_units = [
                CompileUnitInput(die.attributes['DW_AT_name'].value.decode(),
                                 die.attributes['DW_AT_comp_dir'].value.decode(),
                                 [file_entry_to_abs(fe, linep) for fe in linep['file_entry']])
                for cu, linep in cu_helper for die in cu.iter_DIEs() if die.tag == 'DW_TAG_compile_unit'
            ]

            # find compile units
            self.markers = []

            for msym in (sym for sym in self.symbols if sym.name.startswith('__metal_serial_')):
                try:
                    nx : Tuple[LineProgramEntry, LineProgram] = next((entry, linep) for (cu, linep) in cu_helper for entry in linep.get_entries()
                               if entry.state is not None and entry.state.address == msym.address)
                    (loc, linep) = nx

                    abs_file_entry = file_entry_to_abs(linep['file_entry'][loc.state.file - 1], linep)

                    # check if marker already exists -
                    #for existing_marker in self.markers:
                    #    if loc.state.line == existing_marker.line and loc.state.column == existing_marker.column and existing_marker.file == abs_file_entry:
                    #        print(msym.name, existing_marker.name)
                    #        raise Exception("Duplicate code markers found at {}({})".format(existing_marker.file, existing_marker.line))

                    self.markers.append(Marker(
                        msym.name,
                        msym.address,
                        msym.symbol_type,
                        abs_file_entry,
                        loc.state.line,
                        loc.state.column
                    ))
                except StopIteration:
                    raise Exception('Could not find code location for {} at 0x{:x} - this is most likely due to missing gdb symbols.'.format(msym.name, msym.address))
Ejemplo n.º 16
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' %
                  (CU.cu_offset, CU['unit_length']))

            # The first DIE in each compile unit describes it.
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # We're interested in the filename...
            print('    name=%s' % top_DIE.get_full_path())
Ejemplo n.º 17
0
    def run(self):
        elf = ELFFile(self.f)

        if not elf.has_dwarf_info():
            GLib.idle_add(self.window.display_error, "This file has no DWARF info.")
            return

        di = elf.get_dwarf_info()

        builder = DwarfModelBuilder(di, self.verbose)
        total = builder.num_cus()
        n = 0

        generator = builder.build_step()
        file_elem = next(generator)
        while not file_elem:
            if self.stop_requested:
                return

            GLib.idle_add(self.window.load_progress, float(n) / total)
            n = n + 1
            file_elem = next(generator)


        #root_elem = builder.build()

        if self.stop_requested:
            return

        GLib.idle_add(self.window.done_loading, file_elem)
Ejemplo n.º 18
0
 def test_range_list_presence(self):
     with open(
             os.path.join('test', 'testfiles_for_unittests',
                          'sample_exe64.elf'), 'rb') as f:
         elffile = ELFFile(f)
         self.assertTrue(elffile.has_dwarf_info())
         self.assertIsNotNone(elffile.get_dwarf_info().range_lists())
    def run(self):
        elf = ELFFile(self.f)

        if not elf.has_dwarf_info():
            GLib.idle_add(self.window.display_error,
                          "This file has no DWARF info.")
            return

        di = elf.get_dwarf_info()

        builder = DwarfModelBuilder(di, self.verbose)
        total = builder.num_cus()
        n = 0

        generator = builder.build_step()
        file_elem = next(generator)
        while not file_elem:
            if self.stop_requested:
                return

            GLib.idle_add(self.window.load_progress, float(n) / total)
            n = n + 1
            file_elem = next(generator)

        #root_elem = builder.build()

        if self.stop_requested:
            return

        GLib.idle_add(self.window.done_loading, file_elem)
Ejemplo n.º 20
0
def get_frame_base(filename, pc, rebased_addr):
    """
    Call to get frame base
    :param filename: name of the executable file
    :param pc: The address of the beginning of the function
    :param rebased_addr: Should be project.loader.memory.min_addr
    :return: the frame base for the function
    """
    target_loc = pc - rebased_addr
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        min_greater = 1000000000000000000000
        offset = 0
        for CFI in dwarfinfo.EH_CFI_entries():
            if isinstance(CFI, FDE):
                decoded = CFI.get_decoded()
                for entry in decoded.table:
                    if entry['pc'] >= target_loc and entry['pc'] < min_greater:
                        offset = entry['cfa'].offset
                        min_greater = entry['pc']
        return offset
Ejemplo n.º 21
0
def get_cfi(path):
    ''' Get the CFI entries from the ELF at the provided path '''

    try:
        with open(path, 'rb') as file_handle:
            elf_file = ELFFile(file_handle)

            if not elf_file.has_dwarf_info():
                print("No DWARF")
                return None

            dw_info = elf_file.get_dwarf_info()
            if dw_info.has_CFI():
                cfis = dw_info.CFI_entries()
            elif dw_info.has_EH_CFI():
                cfis = dw_info.EH_CFI_entries()
            else:
                print("No CFI")
                return None
    except ELFError:
        print("ELF Error")
        return None
    except DWARFError:
        print("DWARF Error")
        return None
    except PermissionError:
        print("Permission Error")
        return None
    except KeyError:
        print("Key Error")
        return None

    return cfis
Ejemplo n.º 22
0
def main():
    with open(sys.argv[1], 'rb') as f:
        elf = ELFFile(f)
        if not elf.has_dwarf_info():
            print("Object file has no dwarf info!")
            sys.exit(1)

        types = {}

        global_offset = elf.get_dwarf_info().debug_info_sec.global_offset

        for cu in elf.get_dwarf_info().iter_CUs():
            cu_name = cu.get_top_DIE().attributes['DW_AT_name'].value.decode(
                'utf-8')
            print('\x1b[32m\x1b[1mProcessing %s\x1b[0m' % cu_name)

            # First, map top level types
            dies = list(cu.iter_DIEs())

            i = 0
            while i < len(dies):
                offset = dies[i].offset
                current = dies[i]
                i += 1

                common_types = {
                    'DW_TAG_structure_type': Struct,
                    'DW_TAG_class_type': Struct,
                    'DW_TAG_base_type': Primitive,
                    'DW_TAG_typedef': Typedef,
                    'DW_TAG_array_type': Array,
                }

                if current.tag in common_types:
                    assert offset not in types
                    types[offset] = common_types[current.tag](current)
                else:
                    pass  # print("Skipping processing of '%s'" % current.tag)

            for t in types.values():
                t.finalize(types)

            header = '%-4s |\t%-100s |\t%s' % ('#', 'type', 'size')
            print(header)
            print('-' * len(header.expandtabs()))
            for o, t in types.items():
                color = '\x1b[31m\x1b[31m' if t.has_padding() else ''
                print('{:04x} |\t{color}{:100}\x1b[0m |\t{}'.format(
                    o, repr(t), t.byte_size, color=color))
            print('-' * len(header.expandtabs()))
            print()
            for o, t in types.items():
                if t.has_padding():
                    print(
                        "Found padded type '%s' at %s:%u" %
                        (t, cu_name,
                         t.source_object.attributes['DW_AT_decl_line'].value))
                    for p in t.get_padding_list():
                        print('\t%s' % p)
Ejemplo n.º 23
0
def process_file(debugfile, dwzfile, fast=False):
    elffile = ELFFile(debugfile)

    if not elffile.has_dwarf_info():
        assert 0
    else:
        # this file itself has the DWARF information, must be my lucky day!
        return get_producer(debugfile, dwzfile, fast)
Ejemplo n.º 24
0
 def test(self, file):
     """Checks if file contains DWARF debugging data"""
     try:
         elf_file = ELFFile(file)
         return elf_file.has_dwarf_info() and elf_file.get_dwarf_info(
         ).has_debug_info
     except ELFError:
         return False
Ejemplo n.º 25
0
def create_module_from_ELF_DWARF_file(file: Union[str, bytes, int], query = dict()) -> Optional[Module]:
  elf_file = ELFFile(open(file, 'rb'))
  if elf_file.has_dwarf_info():
    module = Module()
    import_ELF_DWARF_into_module(elf_file, module)
    return module
  else:
    return None
Ejemplo n.º 26
0
def _get_dwarf_info(binary_path, relocate_dwarf_sections=True):
    with open(binary_path, 'rb') as f:
        elf_file = ELFFile(f)
        if not elf_file.has_dwarf_info():
            raise RuntimeError(f'{binary_path} has no DWARF info')
        dwarf_info = elf_file.get_dwarf_info(
            relocate_dwarf_sections=relocate_dwarf_sections)
    return dwarf_info
Ejemplo n.º 27
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' %
                  (CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    # Check if this attribute contains location information
                    # pdb.set_trace()
                    if loc_parser.attribute_has_location(attr, CU['version']):
                        var_name = DIE.attributes['DW_AT_name'].value
                        print(' Varname:%s' % (var_name))
                        print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                        loc = loc_parser.parse_from_attribute(
                            attr, CU['version'])
                        # We either get a list (in case the attribute is a
                        # reference to the .debug_loc section) or a LocationExpr
                        # object (in case the attribute itself contains location
                        # information).
                        if isinstance(loc, LocationExpr):
                            print('      %s' % (describe_DWARF_expr(
                                loc.loc_expr, dwarfinfo.structs)))
                        elif isinstance(loc, list):
                            print(show_loclist(loc, dwarfinfo,
                                               indent='      '))
Ejemplo n.º 28
0
Archivo: elf.py Proyecto: f0wl/REHelper
def print_basic_info(filename: str) -> None:
    with open(filename, "rb") as f:
        elffile = ELFFile(f)  # ELF object

        # variables
        sections = ""
        debug = RED + "No" + RESET
        fileMD5 = file_MD5sum(filename)
        filesha1 = file_sha1sum(filename)
        filesha256 = file_sha256sum(filename)
        fileSSDEEP = file_ssdeepsum(filename)
        vtlink = tinyurl("https://www.virustotal.com/gui/file/" + filesha256)

        # logic
        if not vtlink:
            vtlink = "https://www.virustotal.com/gui/file/" + filesha256
        for x in range(elffile.num_sections()):
            if len(elffile.get_section(x).name) > 0:
                sections += "{}{} {}({}) ".format(
                    GREEN,
                    elffile.get_section(x).name, RESET,
                    hex(elffile.get_section(x).data_size))
            if x % 4 == 0 and x > 0:
                sections += "\n"

        if not sections:
            sections = RED + "No sections found" + RESET
        # has debug info?
        if elffile.has_dwarf_info():
            debug = GREEN + "Yes" + RESET

        info_table = [
            ["Filename:", filename], ["Filesize:",
                                      file_size(filename)],
            [
                "Filetype:",
                GREEN + "ELF " + str(elffile.get_machine_arch()) + RESET
            ],
            [
                "Subsystem:",
                GREEN + describe_e_type(elffile.header['e_type']) + RESET
            ], ["MD5: ", fileMD5], ["SHA1: ", filesha1],
            ["SHA256: ", filesha256], ["SSDEEP:", fileSSDEEP],
            ["VT link:", vtlink], ["Symbols:", debug],
            ["Entropy:", str(file_entropy(filename))],
            ["Sections:\n(with size)", sections],
            ["Entrypoint:", "{}".format(hex(elffile.header["e_entry"]))]
        ]

        print("")
        print(
            AsciiTable(
                title="Basic Information",
                table_data=info_table,
            ).table)
        print("")
Ejemplo n.º 29
0
    def __init__(self, stream: TraceStream, elf_file: io.IOBase, basedir: str=''):
        self.stream = stream
        self.basedir = basedir

        self.cache = {}
        elf = ELFFile(elf_file)
        if not elf.has_dwarf_info():
            raise ValueError(f'ELF file {elf} has no DWARF info')

        self.dwarf = elf.get_dwarf_info()
Ejemplo n.º 30
0
 def test_die_size(self):
     with open(
             os.path.join('test', 'testfiles_for_unittests',
                          'trailing_null_dies.elf'), 'rb') as f:
         elffile = ELFFile(f)
         self.assertTrue(elffile.has_dwarf_info())
         dwarfinfo = elffile.get_dwarf_info()
         for CU in dwarfinfo.iter_CUs():
             for child in CU.get_top_DIE().iter_children():
                 self.assertEqual(child.size, 3)
Ejemplo n.º 31
0
    def test_dwarfv5_parses(self):
        dwarfv5_basic = os.path.join('test', 'testfiles_for_unittests', 'dwarfv5_basic.elf')
        with open(dwarfv5_basic, 'rb') as f:
            elf = ELFFile(f)
            # DWARFv5 debugging information is detected.
            self.assertTrue(elf.has_dwarf_info())

            # Fetching DWARFInfo for DWARFv5 doesn't completely explode.
            dwarf = elf.get_dwarf_info()
            self.assertIsNotNone(dwarf)
Ejemplo n.º 32
0
def get_file_line_coverage(target_path, addr_counts):
    with open(target_path, 'r') as f:
        elf = ELFFile(f)

        if not elf.has_dwarf_info():
            raise CommandError(
                '%s has no DWARF info. Please recompile with ``-g``' %
                target_path)

        dwarf_info = elf.get_dwarf_info()
        return _parse_info(dwarf_info, addr_counts)
Ejemplo n.º 33
0
def fetch_lineno(bin_name, func_addrs):
    addr_to_line = {}
    with open(bin_name, "rb") as f:
        elffile = ELFFile(f)
        if not elffile.has_dwarf_info():
            print("No Dwarf Found in ", bin_name)
        else:
            dwarf = elffile.get_dwarf_info()
            addr_to_line = decode_file_line(dwarf, func_addrs)

    return addr_to_line
Ejemplo n.º 34
0
class LocateUndef(object):
    """ Locate symbol in the compilation unit. For each symbol (by a name string) returns an
        object of the pyelftools/dwarf/die type (with includes reference to the compilation
        unit as an attribute.

        Public methods:

            findDies -- for the list of names (of the symbols) prepares the list of
                references to DIE
                
            getDies -- return the list of found DIEs
    """

    def __init__(self,fname):
        """ fname:
                file name of object file 
        """
        self.dies = {}
        self.fh = open( fname, 'rb' )
        if self.fh:
            self.elffile = ELFFile(self.fh)
            if not self.elffile.has_dwarf_info():
                return

            self.dwarfinfo = self.elffile.get_dwarf_info()


        else:
            raise IOError

    def __del__(self):
        if self.fh:
            self.fh.close()

    def findDies( self, namesList ):
        """ namesList -- list of symbol names (strings). Each symbols is expected to be once
            in the nameList"""
        for cu in self.dwarfinfo.iter_CUs():
            for die in cu.iter_DIEs():
                if die.is_null():
                    continue

                if 'DW_AT_name' in die.attributes:
                    name = die.attributes['DW_AT_name'].value.decode('ascii')
                    if name in namesList:
                        self.dies[name] = (cu,die)
                        namesList.remove(name) # not sure whether it works
                                               # have to check false definitions 
                        if len(namesList) < 1:
                            return

    def getDies(self):
        """ returns list of DIEs object (its name is DIE.attributes['DW_AT_name'] """
        return self.dies
Ejemplo n.º 35
0
    def read(self, view):
        self.log.info('Reading file %s', self.file)
        with open(self.file, "rb") as f:
            elffile = ELFFile(f)
            if not elffile.has_dwarf_info():
                print('  file has no DWARF info')
                return

            # get_dwarf_info returns a DWARFInfo context object, which is the
            # starting point for all DWARF-based processing in pyelftools.
            dwarfinfo = elffile.get_dwarf_info()

            for CU in dwarfinfo.iter_CUs():
                #self.symbols.append(DwarfSymbol.new_from_die(CU.get_top_DIE()))

                # DWARFInfo allows to iterate over the compile units contained in
                # the .debug_info section. CU is a CompileUnit object, with some
                # computed attributes (such as its offset in the section) and
                # a header which conforms to the DWARF standard. The access to
                # header elements is, as usual, via item-lookup.
                print('  Found a compile unit at offset %s, length %s' % (
                    CU.cu_offset, CU['unit_length']))

                # structs = [die for die in CU.iter_DIEs() if die.tag=='DW_TAG_structure_type']
                for die in CU.iter_DIEs():
                    #print('DIE %s' % (self.die_repr(die)))
                    if 'DW_TAG_structure_type' == die.tag:
                        if 'DW_AT_name' in die.attributes:
                            name = die.attributes['DW_AT_name'].value.decode()
                        else:
                            name = "{}:{}".format(
                                die.attributes['DW_AT_decl_file'].value,
                                die.attributes['DW_AT_decl_line'].value
                            )
                        if 'DW_AT_byte_size' not in die.attributes:
                            continue
                        size = die.attributes['DW_AT_byte_size'].value
                        members = []
                        if die.has_children:
                            for child in die.iter_children():
                                if 'DW_TAG_member' == child.tag:
                                    members.append(child.attributes['DW_AT_name'].value.decode())
                                    pass
                                pass
                            pass
                        view.add(name, "Struct", size, members)
                        pass
                    pass
                pass
            #for s in self.symbols:
            #    print('Sym: %s' % str(s))
            #    pass
            pass
Ejemplo n.º 36
0
def load_dwarfinfo(filename):
    elffile = ELFFile(filename)

    if not elffile.has_dwarf_info():
        print(filename.name + ': ELF file has no DWARF info!')
        sys.exit(1)

    # get_dwarf_info returns a DWARFInfo context object, which is the
    # starting point for all DWARF-based processing in pyelftools.
    dwarfinfo = elffile.get_dwarf_info()

    return dwarfinfo
Ejemplo n.º 37
0
    def load_binary(self, file):
        file = os.path.abspath(file)

        assert os.path.isfile(file)

        with open(file, "r") as binary_file:
            elffile = ELFFile(binary_file)
            assert elffile.has_dwarf_info()

            self.loaded_file = file
            self.program_info = ProgramInfo(elffile)
            self.runner = ProgramRunner(self, file)
Ejemplo n.º 38
0
def process_dwarf_info(in_file, out_file):
    '''
    Main function processing the dwarf informations from debug sections
  '''
    DEBUG('Processing file: {0}'.format(in_file))

    with open(in_file, 'rb') as f:
        f_elf = ELFFile(f)
        if not f_elf.has_dwarf_info():
            DEBUG("{0} has no debug informations!".format(file))
            return False

        M = CFG_pb2.Module()
        M.name = "GlobalVariable".format('utf-8')

        set_global_machine_arch(f_elf.get_machine_arch())
        dwarf_info = f_elf.get_dwarf_info()
        process_types(dwarf_info, TYPES_MAP)
        process_frames(dwarf_info, EH_FRAMES)
        section_offset = dwarf_info.debug_info_sec.global_offset

        # Iterate through all the compile units
        for CU in dwarf_info.iter_CUs():
            DEBUG('Found a compile unit at offset {0}, length {1}'.format(
                CU.cu_offset, CU['unit_length']))
            top_DIE = CU.get_top_DIE()
            c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset,
                           section_offset)
            c_unit.decode_control_unit(M, GLOBAL_VARIABLES)

        for key, value in GLOBAL_VARIABLES.iteritems():
            if value["size"] > 0:
                gvar = M.global_vars.add()
                gvar.name = value["name"]
                gvar.ea = value["addr"]
                gvar.size = value["size"]
            else:
                DEBUG("Look for {}".format(pprint.pformat(value)))

        #for func in M.funcs:
        #  DEBUG("Function name {}".format(func.name))
        #  for sv in func.stackvars:
        #    DEBUG_PUSH()
        #    DEBUG("{} : {}, ".format(sv.name, sv.sp_offset))
        #    DEBUG_POP()

        with open(out_file, "w") as outf:
            outf.write(M.SerializeToString())

    DEBUG("Global Vars\n")
    DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES)))
    DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES)))
    DEBUG("End Global Vars\n")
Ejemplo n.º 39
0
def process_dwarf_info(in_file, out_file):
  '''
    Main function processing the dwarf informations from debug sections
  '''
  DEBUG('Processing file: {0}'.format(in_file))
    
  with open(in_file, 'rb') as f:
    f_elf = ELFFile(f)    
    if not f_elf.has_dwarf_info():
      DEBUG("{0} has no debug informations!".format(file))
      return False
        
    M = CFG_pb2.Module()
    M.name = "GlobalVariable".format('utf-8')
    
    set_global_machine_arch(f_elf.get_machine_arch())
    dwarf_info = f_elf.get_dwarf_info()
    process_types(dwarf_info, TYPES_MAP)    
    process_frames(dwarf_info, EH_FRAMES)
    section_offset = dwarf_info.debug_info_sec.global_offset
    
    # Iterate through all the compile units
    for CU in dwarf_info.iter_CUs():
      DEBUG('Found a compile unit at offset {0}, length {1}'.format(CU.cu_offset, CU['unit_length']))
      top_DIE = CU.get_top_DIE()
      c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset)
      c_unit.decode_control_unit(M, GLOBAL_VARIABLES)
        
    for key, value in GLOBAL_VARIABLES.iteritems():
      if value["size"] > 0:
        gvar = M.global_vars.add()
        gvar.name = value["name"]
        gvar.ea = value["addr"]
        gvar.size = value["size"]
      else:
        DEBUG("Look for {}".format(pprint.pformat(value)))
        
    #for func in M.funcs:
    #  DEBUG("Function name {}".format(func.name))
    #  for sv in func.stackvars:
    #    DEBUG_PUSH()
    #    DEBUG("{} : {}, ".format(sv.name, sv.sp_offset))
    #    DEBUG_POP()
        
            
    with open(out_file, "w") as outf:
      outf.write(M.SerializeToString())
     
  DEBUG("Global Vars\n")
  DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES)))
  DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES)))
  DEBUG("End Global Vars\n")
Ejemplo n.º 40
0
    def test_DWARF_indirect_forms(self):
        # This file uses a lot of DW_FORM_indirect, and is also an ARM ELF
        # with non-trivial DWARF info.
        # So this is a simple sanity check that we can successfully parse it
        # and extract the expected amount of CUs.
        with open(os.path.join('test', 'testfiles_for_unittests',
                               'arm_with_form_indirect.elf'), 'rb') as f:
            elffile = ELFFile(f)
            self.assertTrue(elffile.has_dwarf_info())

            dwarfinfo = elffile.get_dwarf_info()
            all_CUs = list(dwarfinfo.iter_CUs())
            self.assertEqual(len(all_CUs), 9)
Ejemplo n.º 41
0
def process_file(filename):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)
        # elfclass is a public attribute of ELFFile, read from its header
        print('%s: elfclass is %s' % (filename, elffile.elfclass))

        if elffile.has_dwarf_info():
            dwarfinfo = elffile.get_dwarf_info()
            for CU in dwarfinfo.iter_CUs():
                # cu_offset is a public attribute of CU
                # address_size is part of the CU header
                print('  CU at offset 0x%x. address_size is %s' % (
                    CU.cu_offset, CU['address_size']))
Ejemplo n.º 42
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # Start with the top DIE, the root for this CU's DIE tree
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # Each DIE holds an OrderedDict of attributes, mapping names to
            # values. Values are represented by AttributeValue objects in
            # elftools/dwarf/die.py
            # We're interested in the filename, which is the join of
            # 'DW_AT_comp_dir' and 'DW_AT_name', either of which may be
            # missing in practice. Note that its value
            # is usually a string taken from the .debug_string section. This
            # is done transparently by the library, and such a value will be
            # simply given as a string.
            try:
                comp_dir_attr = top_DIE.attributes['DW_AT_comp_dir']
                comp_dir = bytes2str(comp_dir_attr.value)
                try:
                    name_attr = top_DIE.attributes['DW_AT_name']
                    name = bytes2str(name_attr.value)
                    name = os.path.join(comp_dir, name)
                except KeyError as e:
                    name = comp_dir
            except KeyError as e:
                name_attr = top_DIE.attributes['DW_AT_name']
                name = bytes2str(name_attr.value)
            print('    name=%s' % name)

            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE)
Ejemplo n.º 43
0
def generate_docs_from_stream(filename, fd):
	elffile = ELFFile(fd)

	if not elffile.has_dwarf_info():
		print('    ' + filename + ' has no DWARF info')
		return []

	dwarfinfo = elffile.get_dwarf_info()

	# cu: compilation unit
	# DIE: debug information entry
	
	docs = [mdoc((cu, cu.get_top_DIE())) for cu in dwarfinfo.iter_CUs()]
	return sequence(docs)
Ejemplo n.º 44
0
    def extract(self, binary):
        protos = dict()
        with open(binary, 'rb') as f:
            elf_file = ELFFile(f)

            if not elf_file.has_dwarf_info():
                print('    File has no debug info (DWARF format expected) !')
                return protos

            dwarf_info = elf_file.get_dwarf_info()
            for CU in dwarf_info.iter_CUs():
                for DIE in CU.iter_DIEs():
                    self.__extract_DIE(CU, DIE, protos)

        return protos
Ejemplo n.º 45
0
class SharedObjectInfo():
    def __init__(self, path, baddr):
        self.path = path
        self._set_elf_file()

        self.low_addr = baddr
        self.high_addr = baddr + self._get_mem_size()

        # Check whether the ELF file is position independent code
        self.is_pic = self.elf_file.header['e_type'] == 'ET_DYN'

        # Don't set the so info's dwarf_info initially, only when
        # symbol lookup is first required
        self._dwarf_info = None

    @property
    def dwarf_info(self):
        if self._dwarf_info is None:
            self._set_dwarf_info()

        return self._dwarf_info

    def _set_elf_file(self):
        try:
            binary_file = open(self.path, 'rb')
            self.elf_file = ELFFile(binary_file)
        except IOError:
            print('Failed to open ' + self.path, file=sys.stderr)
            sys.exit(-1)

    def _set_dwarf_info(self):
        if not self.elf_file.has_dwarf_info():
            print('Binary ' + self.path + ' has no DWARF info',
                  file=sys.stderr)
            sys.exit(-1)

        self._dwarf_info = self.elf_file.get_dwarf_info()

    def _get_mem_size(self):
        mem_size = 0
        for segment in self.elf_file.iter_segments():
            if segment['p_type'] == 'PT_LOAD':
                alignment = segment['p_align']
                segment_size = segment['p_memsz']
                aligned_size = math.ceil(segment_size / alignment) * alignment
                mem_size += aligned_size

        return mem_size
Ejemplo n.º 46
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    if attribute_has_location_list(attr):
                        # This is a location list. Its value is an offset into
                        # the .debug_loc section, so we can use the location
                        # lists object to decode it.
                        loclist = location_lists.get_location_list_at_offset(
                            attr.value)

                        print('   DIE %s. attr %s.\n%s' % (
                            DIE.tag,
                            attr.name,
                            show_loclist(loclist, dwarfinfo, indent='      ')))
Ejemplo n.º 47
0
def process_file(filename, outfile):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            raise IOError("ERROR: {} has no DWARF info".format(filename))

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        with open(outfile, 'w') as outFp:
            for CU in dwarfinfo.iter_CUs():
                dies = get_dies_by_offset(CU)
                types = get_serialization_types(["object_t"], CU)

                emit.emit_serializers(outFp, types)
Ejemplo n.º 48
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The range lists are extracted by DWARFInfo from the .debug_ranges
        # section, and returned here as a RangeLists object.
        range_lists = dwarfinfo.range_lists()
        if range_lists is None:
            print('  file has no .debug_ranges section')
            return

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset {0!s}, length {1!s}'.format(
                CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    if attribute_has_range_list(attr):
                        # This is a range list. Its value is an offset into
                        # the .debug_ranges section, so we can use the range
                        # lists object to decode it.
                        rangelist = range_lists.get_range_list_at_offset(
                            attr.value)

                        print('   DIE {0!s}. attr {1!s}.\n{2!s}'.format(
                            DIE.tag,
                            attr.name,
                            rangelist))
Ejemplo n.º 49
0
def process_file(filename, address):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        funcname = decode_funcname(dwarfinfo, address)
        file, line = decode_file_line(dwarfinfo, address)

        print('Function:', bytes2str(funcname))
        print('File:', bytes2str(file))
        print('Line:', line)
Ejemplo n.º 50
0
def test_files(fns,quiet=False,profile=False):
  for fn in fns:
    try:
      elf = ELFFile(open(fn))
    except ELFError:
      if not quiet:
        print "Skipping non-ELF file:",fn
      continue

    if not elf.has_dwarf_info():
      if not quiet:
        print "No dwarf info for {}.".format(fn)
      continue

    dwarfinfo = elf.get_dwarf_info()
    dwarf_functions = get_functions(dwarfinfo)

    engine_functions = {}
    for engine in ENGINES:
      this_engine = Static(fn, debug=0, static_engine=engine) #no debug output
      if args.profile:
        #needs pycallgraph
        from pycallgraph import PyCallGraph
        from pycallgraph.output import GraphvizOutput
        graphviz = GraphvizOutput()
        graphviz.output_file = 'prof.png'
        with PyCallGraph(output=graphviz):
          this_engine.process()
      else:
        this_engine.process()
      engine_functions[engine] = {x.start for x in this_engine['functions']}

    for engine,functions in engine_functions.iteritems():
      missed = dwarf_functions - functions
      total_fxns = len(dwarf_functions)
      short_fn = fn.split("/")[-1] if "/" in fn else fn
      if len(missed) == 0:
        print "{} {}: {} found all {} function(s).".format(ok_green, short_fn, engine, total_fxns)
      else:
        fmt = "{} {}: {} missed {}/{} functions: {}."
        print fmt.format(warn, short_fn, engine,
                len(missed), total_fxns, ", ".join(hex(fxn) for fxn in missed))
Ejemplo n.º 51
0
def get_files_from_executable(filename):
    with open(filename, 'rb') as f:
        # ELFFile looks for magic number, if there's none, ELFError is raised
        try:
            elffile = ELFFile(f)
        except ELFError:
            logging.info("%s is invalid elf file" % filename)
            return []

        if not elffile.has_dwarf_info():
            logging.info("File does not have dwarf info, no sources in the project file")
            return
        dwarfinfo = elffile.get_dwarf_info()

    files = []
    # Go over all the line programs in the DWARF information and get source files paths
    for CU in dwarfinfo.iter_CUs():
        top_DIE = CU.get_top_DIE()
        files.append(top_DIE.get_full_path())
    return files
Ejemplo n.º 52
0
def get_executable_src_files(exec_path):
    assert (os.path.isabs(exec_path))

    exec_src_paths = []
    with open(exec_path, 'rb') as elf_file_handle:
        try:
            elf_file = ELFFile(elf_file_handle)
        except:
            print ('-- Executable \'' + exec_path + '\' is not an ELF file')
            return []

        if not elf_file.has_dwarf_info():
            print ('-- Executable \'' + exec_path + '\' has no DWARF information')
            return []

        dwarf_info = elf_file.get_dwarf_info()
        for CU in dwarf_info.iter_CUs():
            DIE = CU.get_top_DIE()

            name = ''
            comp_dir = ''
            for attr in itervalues(DIE.attributes):
                if attr.name == 'DW_AT_name':
                    name = attr.value
                if attr.name == 'DW_AT_comp_dir':
                    comp_dir = attr.value

            # If the source path in the executable is not an absolute
            # path then use the DW_AT_comp_dir attribute to get the
            # build directory to make it absolute
            #
            # Once we have an absolute path, use realpath to resolve any
            # symbolic links
            src_path = name
            if not os.path.isabs(name):
                assert (comp_dir != '')
                src_path = os.path.join(comp_dir, name)
                assert(os.path.isabs(src_path))
                src_path = os.path.realpath(src_path)
            exec_src_paths.append(src_path.decode())
    return exec_src_paths
Ejemplo n.º 53
0
class ReadElf(object):
    """ display_* methods are used to emit output into the output stream
    """
    def __init__(self, file, output):
        """ file:
                stream object with the ELF file to read

            output:
                output stream to write to
        """
        self.elffile = ELFFile(file)
        self.output = output

        # Lazily initialized if a debug dump is requested
        self._dwarfinfo = None

        self._versioninfo = None

    def display_file_header(self):
        """ Display the ELF file header
        """
        self._emitline('ELF Header:')
        self._emit('  Magic:   ')
        self._emitline(' '.join('%2.2x' % byte2int(b)
                                    for b in self.elffile.e_ident_raw))
        header = self.elffile.header
        e_ident = header['e_ident']
        self._emitline('  Class:                             %s' %
                describe_ei_class(e_ident['EI_CLASS']))
        self._emitline('  Data:                              %s' %
                describe_ei_data(e_ident['EI_DATA']))
        self._emitline('  Version:                           %s' %
                describe_ei_version(e_ident['EI_VERSION']))
        self._emitline('  OS/ABI:                            %s' %
                describe_ei_osabi(e_ident['EI_OSABI']))
        self._emitline('  ABI Version:                       %d' %
                e_ident['EI_ABIVERSION'])
        self._emitline('  Type:                              %s' %
                describe_e_type(header['e_type']))
        self._emitline('  Machine:                           %s' %
                describe_e_machine(header['e_machine']))
        self._emitline('  Version:                           %s' %
                describe_e_version_numeric(header['e_version']))
        self._emitline('  Entry point address:               %s' %
                self._format_hex(header['e_entry']))
        self._emit('  Start of program headers:          %s' %
                header['e_phoff'])
        self._emitline(' (bytes into file)')
        self._emit('  Start of section headers:          %s' %
                header['e_shoff'])
        self._emitline(' (bytes into file)')
        self._emitline('  Flags:                             %s%s' %
                (self._format_hex(header['e_flags']),
                self.decode_flags(header['e_flags'])))
        self._emitline('  Size of this header:               %s (bytes)' %
                header['e_ehsize'])
        self._emitline('  Size of program headers:           %s (bytes)' %
                header['e_phentsize'])
        self._emitline('  Number of program headers:         %s' %
                header['e_phnum'])
        self._emitline('  Size of section headers:           %s (bytes)' %
                header['e_shentsize'])
        self._emitline('  Number of section headers:         %s' %
                header['e_shnum'])
        self._emitline('  Section header string table index: %s' %
                header['e_shstrndx'])

    def decode_flags(self, flags):
        description = ""
        if self.elffile['e_machine'] == "EM_ARM":
            if flags & E_FLAGS.EF_ARM_HASENTRY:
                description += ", has entry point"

            version = flags & E_FLAGS.EF_ARM_EABIMASK
            if version == E_FLAGS.EF_ARM_EABI_VER5:
                description += ", Version5 EABI"
        return description

    def display_program_headers(self, show_heading=True):
        """ Display the ELF program headers.
            If show_heading is True, displays the heading for this information
            (Elf file type is...)
        """
        self._emitline()
        if self.elffile.num_segments() == 0:
            self._emitline('There are no program headers in this file.')
            return

        elfheader = self.elffile.header
        if show_heading:
            self._emitline('Elf file type is %s' %
                describe_e_type(elfheader['e_type']))
            self._emitline('Entry point is %s' %
                self._format_hex(elfheader['e_entry']))
            # readelf weirness - why isn't e_phoff printed as hex? (for section
            # headers, it is...)
            self._emitline('There are %s program headers, starting at offset %s' % (
                elfheader['e_phnum'], elfheader['e_phoff']))
            self._emitline()

        self._emitline('Program Headers:')

        # Now comes the table of program headers with their attributes. Note
        # that due to different formatting constraints of 32-bit and 64-bit
        # addresses, there are some conditions on elfclass here.
        #
        # First comes the table heading
        #
        if self.elffile.elfclass == 32:
            self._emitline('  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align')
        else:
            self._emitline('  Type           Offset             VirtAddr           PhysAddr')
            self._emitline('                 FileSiz            MemSiz              Flags  Align')

        # Now the entries
        #
        for segment in self.elffile.iter_segments():
            self._emit('  %-14s ' % describe_p_type(segment['p_type']))

            if self.elffile.elfclass == 32:
                self._emitline('%s %s %s %s %s %-3s %s' % (
                    self._format_hex(segment['p_offset'], fieldsize=6),
                    self._format_hex(segment['p_vaddr'], fullhex=True),
                    self._format_hex(segment['p_paddr'], fullhex=True),
                    self._format_hex(segment['p_filesz'], fieldsize=5),
                    self._format_hex(segment['p_memsz'], fieldsize=5),
                    describe_p_flags(segment['p_flags']),
                    self._format_hex(segment['p_align'])))
            else: # 64
                self._emitline('%s %s %s' % (
                    self._format_hex(segment['p_offset'], fullhex=True),
                    self._format_hex(segment['p_vaddr'], fullhex=True),
                    self._format_hex(segment['p_paddr'], fullhex=True)))
                self._emitline('                 %s %s  %-3s    %s' % (
                    self._format_hex(segment['p_filesz'], fullhex=True),
                    self._format_hex(segment['p_memsz'], fullhex=True),
                    describe_p_flags(segment['p_flags']),
                    # lead0x set to False for p_align, to mimic readelf.
                    # No idea why the difference from 32-bit mode :-|
                    self._format_hex(segment['p_align'], lead0x=False)))

            if isinstance(segment, InterpSegment):
                self._emitline('      [Requesting program interpreter: %s]' %
                    bytes2str(segment.get_interp_name()))

        # Sections to segments mapping
        #
        if self.elffile.num_sections() == 0:
            # No sections? We're done
            return

        self._emitline('\n Section to Segment mapping:')
        self._emitline('  Segment Sections...')

        for nseg, segment in enumerate(self.elffile.iter_segments()):
            self._emit('   %2.2d     ' % nseg)

            for section in self.elffile.iter_sections():
                if (    not section.is_null() and
                        segment.section_in_segment(section)):
                    self._emit('%s ' % bytes2str(section.name))

            self._emitline('')

    def display_section_headers(self, show_heading=True):
        """ Display the ELF section headers
        """
        elfheader = self.elffile.header
        if show_heading:
            self._emitline('There are %s section headers, starting at offset %s' % (
                elfheader['e_shnum'], self._format_hex(elfheader['e_shoff'])))

        self._emitline('\nSection Header%s:' % (
            's' if elfheader['e_shnum'] > 1 else ''))

        # Different formatting constraints of 32-bit and 64-bit addresses
        #
        if self.elffile.elfclass == 32:
            self._emitline('  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al')
        else:
            self._emitline('  [Nr] Name              Type             Address           Offset')
            self._emitline('       Size              EntSize          Flags  Link  Info  Align')

        # Now the entries
        #
        for nsec, section in enumerate(self.elffile.iter_sections()):
            self._emit('  [%2u] %-17.17s %-15.15s ' % (
                nsec, bytes2str(section.name), describe_sh_type(section['sh_type'])))

            if self.elffile.elfclass == 32:
                self._emitline('%s %s %s %s %3s %2s %3s %2s' % (
                    self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False),
                    self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False),
                    self._format_hex(section['sh_size'], fieldsize=6, lead0x=False),
                    self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))
            else: # 64
                self._emitline(' %s  %s' % (
                    self._format_hex(section['sh_addr'], fullhex=True, lead0x=False),
                    self._format_hex(section['sh_offset'],
                        fieldsize=16 if section['sh_offset'] > 0xffffffff else 8,
                        lead0x=False)))
                self._emitline('       %s  %s %3s      %2s   %3s     %s' % (
                    self._format_hex(section['sh_size'], fullhex=True, lead0x=False),
                    self._format_hex(section['sh_entsize'], fullhex=True, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))

        self._emitline('Key to Flags:')
        self._emit('  W (write), A (alloc), X (execute), M (merge), S (strings)')
        if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'):
            self._emitline(', l (large)')
        else:
            self._emitline()
        self._emitline('  I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)')
        self._emitline('  O (extra OS processing required) o (OS specific), p (processor specific)')

    def display_symbol_tables(self):
        """ Display the symbol tables contained in the file
        """
        self._init_versioninfo()

        for section in self.elffile.iter_sections():
            if not isinstance(section, SymbolTableSection):
                continue

            if section['sh_entsize'] == 0:
                self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % (
                    bytes2str(section.name)))
                continue

            self._emitline("\nSymbol table '%s' contains %s entries:" % (
                bytes2str(section.name), section.num_symbols()))

            if self.elffile.elfclass == 32:
                self._emitline('   Num:    Value  Size Type    Bind   Vis      Ndx Name')
            else: # 64
                self._emitline('   Num:    Value          Size Type    Bind   Vis      Ndx Name')

            for nsym, symbol in enumerate(section.iter_symbols()):

                version_info = ''
                # readelf doesn't display version info for Solaris versioning
                if (section['sh_type'] == 'SHT_DYNSYM' and
                        self._versioninfo['type'] == 'GNU'):
                    version = self._symbol_version(nsym)
                    if (version['name'] != bytes2str(symbol.name) and
                        version['index'] not in ('VER_NDX_LOCAL',
                                                 'VER_NDX_GLOBAL')):
                        if version['filename']:
                            # external symbol
                            version_info = '@%(name)s (%(index)i)' % version
                        else:
                            # internal symbol
                            if version['hidden']:
                                version_info = '@%(name)s' % version
                            else:
                                version_info = '@@%(name)s' % version

                # symbol names are truncated to 25 chars, similarly to readelf
                self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s%s' % (
                    nsym,
                    self._format_hex(
                        symbol['st_value'], fullhex=True, lead0x=False),
                    symbol['st_size'],
                    describe_symbol_type(symbol['st_info']['type']),
                    describe_symbol_bind(symbol['st_info']['bind']),
                    describe_symbol_visibility(symbol['st_other']['visibility']),
                    describe_symbol_shndx(symbol['st_shndx']),
                    bytes2str(symbol.name),
                    version_info))

    def display_dynamic_tags(self):
        """ Display the dynamic tags contained in the file
        """
        has_dynamic_sections = False
        for section in self.elffile.iter_sections():
            if not isinstance(section, DynamicSection):
                continue

            has_dynamic_sections = True
            self._emitline("\nDynamic section at offset %s contains %s entries:" % (
                self._format_hex(section['sh_offset']),
                section.num_tags()))
            self._emitline("  Tag        Type                         Name/Value")

            padding = 20 + (8 if self.elffile.elfclass == 32 else 0)
            for tag in section.iter_tags():
                if tag.entry.d_tag == 'DT_NEEDED':
                    parsed = 'Shared library: [%s]' % bytes2str(tag.needed)
                elif tag.entry.d_tag == 'DT_RPATH':
                    parsed = 'Library rpath: [%s]' % bytes2str(tag.rpath)
                elif tag.entry.d_tag == 'DT_RUNPATH':
                    parsed = 'Library runpath: [%s]' % bytes2str(tag.runpath)
                elif tag.entry.d_tag == 'DT_SONAME':
                    parsed = 'Library soname: [%s]' % bytes2str(tag.soname)
                elif (tag.entry.d_tag.endswith('SZ') or
                      tag.entry.d_tag.endswith('ENT')):
                    parsed = '%i (bytes)' % tag['d_val']
                elif (tag.entry.d_tag.endswith('NUM') or
                      tag.entry.d_tag.endswith('COUNT')):
                    parsed = '%i' % tag['d_val']
                elif tag.entry.d_tag == 'DT_PLTREL':
                    s = describe_dyn_tag(tag.entry.d_val)
                    if s.startswith('DT_'):
                        s = s[3:]
                    parsed = '%s' % s
                else:
                    parsed = '%#x' % tag['d_val']

                self._emitline(" %s %-*s %s" % (
                    self._format_hex(ENUM_D_TAG.get(tag.entry.d_tag, tag.entry.d_tag),
                        fullhex=True, lead0x=True),
                    padding,
                    '(%s)' % (tag.entry.d_tag[3:],),
                    parsed))
        if not has_dynamic_sections:
            # readelf only prints this if there is at least one segment
            if self.elffile.num_segments():
                self._emitline("\nThere is no dynamic section in this file.")

    def display_relocations(self):
        """ Display the relocations contained in the file
        """
        has_relocation_sections = False
        for section in self.elffile.iter_sections():
            if not isinstance(section, RelocationSection):
                continue

            has_relocation_sections = True
            self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % (
                bytes2str(section.name),
                self._format_hex(section['sh_offset']),
                section.num_relocations()))
            if section.is_RELA():
                self._emitline("  Offset          Info           Type           Sym. Value    Sym. Name + Addend")
            else:
                self._emitline(" Offset     Info    Type            Sym.Value  Sym. Name")

            # The symbol table section pointed to in sh_link
            symtable = self.elffile.get_section(section['sh_link'])

            for rel in section.iter_relocations():
                hexwidth = 8 if self.elffile.elfclass == 32 else 12
                self._emit('%s  %s %-17.17s' % (
                    self._format_hex(rel['r_offset'],
                        fieldsize=hexwidth, lead0x=False),
                    self._format_hex(rel['r_info'],
                        fieldsize=hexwidth, lead0x=False),
                    describe_reloc_type(
                        rel['r_info_type'], self.elffile)))

                if rel['r_info_sym'] == 0:
                    self._emitline()
                    continue

                symbol = symtable.get_symbol(rel['r_info_sym'])
                # Some symbols have zero 'st_name', so instead what's used is
                # the name of the section they point at
                if symbol['st_name'] == 0:
                    symsec = self.elffile.get_section(symbol['st_shndx'])
                    symbol_name = symsec.name
                else:
                    symbol_name = symbol.name
                self._emit(' %s %s%22.22s' % (
                    self._format_hex(
                        symbol['st_value'],
                        fullhex=True, lead0x=False),
                    '  ' if self.elffile.elfclass == 32 else '',
                    bytes2str(symbol_name)))
                if section.is_RELA():
                    self._emit(' %s %x' % (
                        '+' if rel['r_addend'] >= 0 else '-',
                        abs(rel['r_addend'])))
                self._emitline()

        if not has_relocation_sections:
            self._emitline('\nThere are no relocations in this file.')

    def display_version_info(self):
        """ Display the version info contained in the file
        """
        self._init_versioninfo()

        if not self._versioninfo['type']:
            self._emitline("\nNo version information found in this file.")
            return

        for section in self.elffile.iter_sections():
            if isinstance(section, GNUVerSymSection):
                self._print_version_section_header(
                    section, 'Version symbols', lead0x=False)

                num_symbols = section.num_symbols()
    
                # Symbol version info are printed four by four entries 
                for idx_by_4 in range(0, num_symbols, 4):

                    self._emit('  %03x:' % idx_by_4)

                    for idx in range(idx_by_4, min(idx_by_4 + 4, num_symbols)):

                        symbol_version = self._symbol_version(idx)
                        if symbol_version['index'] == 'VER_NDX_LOCAL':
                            version_index = 0
                            version_name = '(*local*)'
                        elif symbol_version['index'] == 'VER_NDX_GLOBAL':
                            version_index = 1
                            version_name = '(*global*)'
                        else:
                            version_index = symbol_version['index']
                            version_name = '(%(name)s)' % symbol_version

                        visibility = 'h' if symbol_version['hidden'] else ' '

                        self._emit('%4x%s%-13s' % (
                            version_index, visibility, version_name))

                    self._emitline()

            elif isinstance(section, GNUVerDefSection):
                self._print_version_section_header(
                    section, 'Version definition', indent=2)

                offset = 0
                for verdef, verdaux_iter in section.iter_versions():
                    verdaux = next(verdaux_iter)

                    name = verdaux.name
                    if verdef['vd_flags']:
                        flags = describe_ver_flags(verdef['vd_flags'])
                        # Mimic exactly the readelf output
                        flags += ' '
                    else:
                        flags = 'none'

                    self._emitline('  %s: Rev: %i  Flags: %s  Index: %i'
                                   '  Cnt: %i  Name: %s' % (
                            self._format_hex(offset, fieldsize=6,
                                             alternate=True),
                            verdef['vd_version'], flags, verdef['vd_ndx'],
                            verdef['vd_cnt'], bytes2str(name)))

                    verdaux_offset = (
                            offset + verdef['vd_aux'] + verdaux['vda_next'])
                    for idx, verdaux in enumerate(verdaux_iter, start=1):
                        self._emitline('  %s: Parent %i: %s' %
                            (self._format_hex(verdaux_offset, fieldsize=4),
                                              idx, bytes2str(verdaux.name)))
                        verdaux_offset += verdaux['vda_next']

                    offset += verdef['vd_next']

            elif isinstance(section, GNUVerNeedSection):
                self._print_version_section_header(section, 'Version needs')

                offset = 0
                for verneed, verneed_iter in section.iter_versions():

                    self._emitline('  %s: Version: %i  File: %s  Cnt: %i' % (
                            self._format_hex(offset, fieldsize=6,
                                             alternate=True),
                            verneed['vn_version'], bytes2str(verneed.name),
                            verneed['vn_cnt']))

                    vernaux_offset = offset + verneed['vn_aux']
                    for idx, vernaux in enumerate(verneed_iter, start=1):
                        if vernaux['vna_flags']:
                            flags = describe_ver_flags(vernaux['vna_flags'])
                            # Mimic exactly the readelf output
                            flags += ' '
                        else:
                            flags = 'none'

                        self._emitline(
                            '  %s:   Name: %s  Flags: %s  Version: %i' % (
                                self._format_hex(vernaux_offset, fieldsize=4),
                                bytes2str(vernaux.name), flags,
                                vernaux['vna_other']))

                        vernaux_offset += vernaux['vna_next']

                    offset += verneed['vn_next']

    def display_hex_dump(self, section_spec):
        """ Display a hex dump of a section. section_spec is either a section
            number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            self._emitline("Section '%s' does not exist in the file!" % (
                section_spec))
            return

        self._emitline("\nHex dump of section '%s':" % bytes2str(section.name))
        self._note_relocs_for_section(section)
        addr = section['sh_addr']
        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            bytesleft = len(data) - dataptr
            # chunks of 16 bytes per line
            linebytes = 16 if bytesleft > 16 else bytesleft

            self._emit('  %s ' % self._format_hex(addr, fieldsize=8))
            for i in range(16):
                if i < linebytes:
                    self._emit('%2.2x' % byte2int(data[dataptr + i]))
                else:
                    self._emit('  ')
                if i % 4 == 3:
                    self._emit(' ')

            for i in range(linebytes):
                c = data[dataptr + i : dataptr + i + 1]
                if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f:
                    self._emit(bytes2str(c))
                else:
                    self._emit(bytes2str(b'.'))

            self._emitline()
            addr += linebytes
            dataptr += linebytes

        self._emitline()

    def display_string_dump(self, section_spec):
        """ Display a strings dump of a section. section_spec is either a
            section number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            self._emitline("Section '%s' does not exist in the file!" % (
                section_spec))
            return

        self._emitline("\nString dump of section '%s':" % bytes2str(section.name))

        found = False
        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            while ( dataptr < len(data) and
                    not (32 <= byte2int(data[dataptr]) <= 127)):
                dataptr += 1

            if dataptr >= len(data):
                break

            endptr = dataptr
            while endptr < len(data) and byte2int(data[endptr]) != 0:
                endptr += 1

            found = True
            self._emitline('  [%6x]  %s' % (
                dataptr, bytes2str(data[dataptr:endptr])))

            dataptr = endptr

        if not found:
            self._emitline('  No strings found in this section.')
        else:
            self._emitline()

    def display_debug_dump(self, dump_what):
        """ Dump a DWARF section
        """
        self._init_dwarfinfo()
        if self._dwarfinfo is None:
            return

        set_global_machine_arch(self.elffile.get_machine_arch())

        if dump_what == 'info':
            self._dump_debug_info()
        elif dump_what == 'decodedline':
            self._dump_debug_line_programs()
        elif dump_what == 'frames':
            self._dump_debug_frames()
        elif dump_what == 'frames-interp':
            self._dump_debug_frames_interp()
        else:
            self._emitline('debug dump not yet supported for "%s"' % dump_what)

    def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True,
                    alternate=False):
        """ Format an address into a hexadecimal string.

            fieldsize:
                Size of the hexadecimal field (with leading zeros to fit the
                address into. For example with fieldsize=8, the format will
                be %08x
                If None, the minimal required field size will be used.

            fullhex:
                If True, override fieldsize to set it to the maximal size
                needed for the elfclass

            lead0x:
                If True, leading 0x is added

            alternate:
                If True, override lead0x to emulate the alternate
                hexadecimal form specified in format string with the #
                character: only non-zero values are prefixed with 0x.
                This form is used by readelf.
        """
        if alternate:
            if addr == 0:
                lead0x = False
            else:
                lead0x = True
                fieldsize -= 2

        s = '0x' if lead0x else ''
        if fullhex:
            fieldsize = 8 if self.elffile.elfclass == 32 else 16
        if fieldsize is None:
            field = '%x'
        else:
            field = '%' + '0%sx' % fieldsize
        return s + field % addr

    def _print_version_section_header(self, version_section, name, lead0x=True,
                                      indent=1):
        """ Print a section header of one version related section (versym,
            verneed or verdef) with some options to accomodate readelf
            little differences between each header (e.g. indentation
            and 0x prefixing).
        """
        if hasattr(version_section, 'num_versions'):
            num_entries = version_section.num_versions()
        else:
            num_entries = version_section.num_symbols()

        self._emitline("\n%s section '%s' contains %s entries:" %
            (name, bytes2str(version_section.name), num_entries))
        self._emitline('%sAddr: %s  Offset: %s  Link: %i (%s)' % (
            ' ' * indent,
            self._format_hex(
                version_section['sh_addr'], fieldsize=16, lead0x=lead0x),
            self._format_hex(
                version_section['sh_offset'], fieldsize=6, lead0x=True),
            version_section['sh_link'],
            bytes2str(
                self.elffile.get_section(version_section['sh_link']).name)
            )
        )

    def _init_versioninfo(self):
        """ Search and initialize informations about version related sections
            and the kind of versioning used (GNU or Solaris).
        """
        if self._versioninfo is not None:
            return

        self._versioninfo = {'versym': None, 'verdef': None,
                             'verneed': None, 'type': None}

        for section in self.elffile.iter_sections():
            if isinstance(section, GNUVerSymSection):
                self._versioninfo['versym'] = section
            elif isinstance(section, GNUVerDefSection):
                self._versioninfo['verdef'] = section
            elif isinstance(section, GNUVerNeedSection):
                self._versioninfo['verneed'] = section
            elif isinstance(section, DynamicSection):
                for tag in section.iter_tags():
                    if tag['d_tag'] == 'DT_VERSYM':
                        self._versioninfo['type'] = 'GNU'
                        break

        if not self._versioninfo['type'] and (
                self._versioninfo['verneed'] or self._versioninfo['verdef']):
            self._versioninfo['type'] = 'Solaris'

    def _symbol_version(self, nsym):
        """ Return a dict containing information on the
                   or None if no version information is available
        """
        self._init_versioninfo()

        symbol_version = dict.fromkeys(('index', 'name', 'filename', 'hidden'))

        if (not self._versioninfo['versym'] or
                nsym >= self._versioninfo['versym'].num_symbols()):
            return None

        symbol = self._versioninfo['versym'].get_symbol(nsym)
        index = symbol.entry['ndx']
        if not index in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL'):
            index = int(index)

            if self._versioninfo['type'] == 'GNU':
                # In GNU versioning mode, the highest bit is used to
                # store wether the symbol is hidden or not
                if index & 0x8000:
                    index &= ~0x8000
                    symbol_version['hidden'] = True

            if (self._versioninfo['verdef'] and
                    index <= self._versioninfo['verdef'].num_versions()):
                _, verdaux_iter = \
                        self._versioninfo['verdef'].get_version(index)
                symbol_version['name'] = bytes2str(next(verdaux_iter).name)
            else:
                verneed, vernaux = \
                        self._versioninfo['verneed'].get_version(index)
                symbol_version['name'] = bytes2str(vernaux.name)
                symbol_version['filename'] = bytes2str(verneed.name)

        symbol_version['index'] = index
        return symbol_version

    def _section_from_spec(self, spec):
        """ Retrieve a section given a "spec" (either number or name).
            Return None if no such section exists in the file.
        """
        try:
            num = int(spec)
            if num < self.elffile.num_sections():
                return self.elffile.get_section(num)
            else:
                return None
        except ValueError:
            # Not a number. Must be a name then
            return self.elffile.get_section_by_name(str2bytes(spec))

    def _note_relocs_for_section(self, section):
        """ If there are relocation sections pointing to the givne section,
            emit a note about it.
        """
        for relsec in self.elffile.iter_sections():
            if isinstance(relsec, RelocationSection):
                info_idx = relsec['sh_info']
                if self.elffile.get_section(info_idx) == section:
                    self._emitline('  Note: This section has relocations against it, but these have NOT been applied to this dump.')
                    return

    def _init_dwarfinfo(self):
        """ Initialize the DWARF info contained in the file and assign it to
            self._dwarfinfo.
            Leave self._dwarfinfo at None if no DWARF info was found in the file
        """
        if self._dwarfinfo is not None:
            return

        if self.elffile.has_dwarf_info():
            self._dwarfinfo = self.elffile.get_dwarf_info()
        else:
            self._dwarfinfo = None

    def _dump_debug_info(self):
        """ Dump the debugging info section.
        """
        self._emitline('Contents of the .debug_info section:\n')

        # Offset of the .debug_info section in the stream
        section_offset = self._dwarfinfo.debug_info_sec.global_offset

        for cu in self._dwarfinfo.iter_CUs():
            self._emitline('  Compilation Unit @ offset %s:' %
                self._format_hex(cu.cu_offset))
            self._emitline('   Length:        %s (%s)' % (
                self._format_hex(cu['unit_length']),
                '%s-bit' % cu.dwarf_format()))
            self._emitline('   Version:       %s' % cu['version']),
            self._emitline('   Abbrev Offset: %s' % (
                self._format_hex(cu['debug_abbrev_offset']))),
            self._emitline('   Pointer Size:  %s' % cu['address_size'])

            # The nesting depth of each DIE within the tree of DIEs must be
            # displayed. To implement this, a counter is incremented each time
            # the current DIE has children, and decremented when a null die is
            # encountered. Due to the way the DIE tree is serialized, this will
            # correctly reflect the nesting depth
            #
            die_depth = 0
            for die in cu.iter_DIEs():
                self._emitline(' <%s><%x>: Abbrev Number: %s%s' % (
                    die_depth,
                    die.offset,
                    die.abbrev_code,
                    (' (%s)' % die.tag) if not die.is_null() else ''))
                if die.is_null():
                    die_depth -= 1
                    continue

                for attr in itervalues(die.attributes):
                    name = attr.name
                    # Unknown attribute values are passed-through as integers
                    if isinstance(name, int):
                        name = 'Unknown AT value: %x' % name
                    self._emitline('    <%2x>   %-18s: %s' % (
                        attr.offset,
                        name,
                        describe_attr_value(
                            attr, die, section_offset)))

                if die.has_children:
                    die_depth += 1

        self._emitline()

    def _dump_debug_line_programs(self):
        """ Dump the (decoded) line programs from .debug_line
            The programs are dumped in the order of the CUs they belong to.
        """
        self._emitline('Decoded dump of debug contents of section .debug_line:\n')

        for cu in self._dwarfinfo.iter_CUs():
            lineprogram = self._dwarfinfo.line_program_for_CU(cu)

            cu_filename = bytes2str(lineprogram['file_entry'][0].name)
            if len(lineprogram['include_directory']) > 0:
                dir_index = lineprogram['file_entry'][0].dir_index
                if dir_index > 0:
                    dir = lineprogram['include_directory'][dir_index - 1]
                else:
                    dir = b'.'
                cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)

            self._emitline('CU: %s:' % cu_filename)
            self._emitline('File name                            Line number    Starting address')

            # Print each state's file, line and address information. For some
            # instructions other output is needed to be compatible with
            # readelf.
            for entry in lineprogram.get_entries():
                state = entry.state
                if state is None:
                    # Special handling for commands that don't set a new state
                    if entry.command == DW_LNS_set_file:
                        file_entry = lineprogram['file_entry'][entry.args[0] - 1]
                        if file_entry.dir_index == 0:
                            # current directory
                            self._emitline('\n./%s:[++]' % (
                                bytes2str(file_entry.name)))
                        else:
                            self._emitline('\n%s/%s:' % (
                                bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]),
                                bytes2str(file_entry.name)))
                    elif entry.command == DW_LNE_define_file:
                        self._emitline('%s:' % (
                            bytes2str(lineprogram['include_directory'][entry.args[0].dir_index])))
                elif not state.end_sequence:
                    # readelf doesn't print the state after end_sequence
                    # instructions. I think it's a bug but to be compatible
                    # I don't print them too.
                    self._emitline('%-35s  %11d  %18s' % (
                        bytes2str(lineprogram['file_entry'][state.file - 1].name),
                        state.line,
                        '0' if state.address == 0 else
                               self._format_hex(state.address)))
                if entry.command == DW_LNS_copy:
                    # Another readelf oddity...
                    self._emitline()

    def _dump_debug_frames(self):
        """ Dump the raw frame information from .debug_frame
        """
        if not self._dwarfinfo.has_CFI():
            return
        self._emitline('Contents of the .debug_frame section:')

        for entry in self._dwarfinfo.CFI_entries():
            if isinstance(entry, CIE):
                self._emitline('\n%08x %08x %08x CIE' % (
                    entry.offset, entry['length'], entry['CIE_id']))
                self._emitline('  Version:               %d' % entry['version'])
                self._emitline('  Augmentation:          "%s"' % bytes2str(entry['augmentation']))
                self._emitline('  Code alignment factor: %u' % entry['code_alignment_factor'])
                self._emitline('  Data alignment factor: %d' % entry['data_alignment_factor'])
                self._emitline('  Return address column: %d' % entry['return_address_register'])
                self._emitline()
            else: # FDE
                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_pointer'],
                    entry.cie.offset,
                    entry['initial_location'],
                    entry['initial_location'] + entry['address_range']))

            self._emit(describe_CFI_instructions(entry))
        self._emitline()

    def _dump_debug_frames_interp(self):
        """ Dump the interpreted (decoded) frame information from .debug_frame
        """
        if not self._dwarfinfo.has_CFI():
            return

        self._emitline('Contents of the .debug_frame section:')

        for entry in self._dwarfinfo.CFI_entries():
            if isinstance(entry, CIE):
                self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_id'],
                    bytes2str(entry['augmentation']),
                    entry['code_alignment_factor'],
                    entry['data_alignment_factor'],
                    entry['return_address_register']))
                ra_regnum = entry['return_address_register']
            else: # FDE
                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_pointer'],
                    entry.cie.offset,
                    entry['initial_location'],
                    entry['initial_location'] + entry['address_range']))
                ra_regnum = entry.cie['return_address_register']

            # Print the heading row for the decoded table
            self._emit('   LOC')
            self._emit('  ' if entry.structs.address_size == 4 else '          ')
            self._emit(' CFA      ')

            # Decode the table nad look at the registers it describes.
            # We build reg_order here to match readelf's order. In particular,
            # registers are sorted by their number, and the register matching
            # ra_regnum is always listed last with a special heading.
            decoded_table = entry.get_decoded()
            reg_order = sorted(ifilter(
                lambda r: r != ra_regnum,
                decoded_table.reg_order))

            # Headings for the registers
            for regnum in reg_order:
                self._emit('%-6s' % describe_reg_name(regnum))
            self._emitline('ra      ')

            # Now include ra_regnum in reg_order to print its values similarly
            # to the other registers.
            reg_order.append(ra_regnum)
            for line in decoded_table.table:
                self._emit(self._format_hex(
                    line['pc'], fullhex=True, lead0x=False))
                self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa']))

                for regnum in reg_order:
                    if regnum in line:
                        s = describe_CFI_register_rule(line[regnum])
                    else:
                        s = 'u'
                    self._emit('%-6s' % s)
                self._emitline()
        self._emitline()

    def _emit(self, s=''):
        """ Emit an object to output
        """
        self.output.write(str(s))

    def _emitline(self, s=''):
        """ Emit an object to output, followed by a newline
        """
        self.output.write(str(s) + '\n')
Ejemplo n.º 54
0
args = parser.parse_args()

def die(fmt, *args):
  sys.stderr.write(('struct2java.py: ' + fmt + '\n') % args)
  sys.exit(1)

def DIE_to_name(DIE):
  name_attr = DIE.attributes.get('DW_AT_name')
  if name_attr is not None:
    return name_attr.value.decode('ascii')

  return None

with open(args.elffile, 'rb') as f:
  elffile = ELFFile(f)
  if not elffile.has_dwarf_info():
    die('file does not contain debug information')

  dwarfinfo = elffile.get_dwarf_info()

  structs_by_offset = {}
  structs_by_name = {}
  typedefs_by_name = {}

  for CU in dwarfinfo.iter_CUs():
    for DIE in CU.iter_DIEs():
      if DIE.tag == 'DW_TAG_typedef':
        name = DIE_to_name(DIE)
        if name is not None:
          typedefs_by_name[name] = DIE
      
Ejemplo n.º 55
0
class Image(object):
    def __init__(self, fname):
        if platform.system() == "Windows":
            elf_data = open(fname, "r")
        else:     
            with open(fname, "r") as f:
                elf_data = StringIO(f.read())
        
        self.elf = ELFFile(elf_data)
        if self.elf.has_dwarf_info():
            self.dwarf = self.elf.get_dwarf_info()
            set_global_machine_arch(self.elf.get_machine_arch())
            self.__tame_dwarf()
            self.get_expr_evaluator = lambda: ExprLiveEval(self)

    @property
    def executable(self):
        try:
            return self._exe
        except:
            self._exe = self._build_executable()
        return self._exe

    def _build_executable(self):
        s = self.elf.get_section(1)
        assert s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS"        
        base_addr = s.header["sh_addr"]
        
        img = s.data()

        s = self.elf.get_section(2)
        if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS":
            if s.header["sh_addr"] != base_addr + len(img):
                raise Exception("bad section vaddr - #2 should follow #1")

            img += s.data()

            s = self.elf.get_section(3)
            print "%s" % str(s.header)
            if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS":
                if s.header["sh_addr"] != base_addr + len(img):
                    raise Exception("bad section vaddr - #3 should follow #2")

                img += s.data()

        return (base_addr, img)

    def __tame_dwarf(self):
        dw = self.dwarf
        self._compile_units = {}
        self._addresses = {}
        self._lowest_known_address = None
        
        location_lists = dw.location_lists()
            
        
        cfi = None
        if dw.has_EH_CFI():
            cfi = dw.EH_CFI_entries()
            print "we have EH CFI entries"
        elif dw.has_CFI():
            cfi = dw.CFI_entries()
            print "we have CFI entries"
        
        else:
            print "no (EH) CFI"

        if None is not cfi:
            self._cfa_rule = {}
            for c in cfi:
                try:
                    decoded = c.get_decoded()
                except:
                    print "CFI decoding exception"
                    break

                for entry in decoded.table:
                    if entry["pc"] in self._cfa_rule:
                        print "duplicate cfa rule found at pc %x" % entry["pc"]
                        print "\t%s" % str(self._cfa_rule[entry["pc"]])
                        print "\t%s" % str(entry)
                        print
                    #assert (not entry["pc"] in self._cfa_rule) or (self._cfa_rule[entry["pc"]] == entry)
                    self._cfa_rule[entry["pc"]] = entry


            
        
        for c in dw.iter_CUs():
            functions = {}  
            variables = {}

            td = c.get_top_DIE()

            for d in td.iter_children():
                if d.tag == 'DW_TAG_subprogram':
                    if 'DW_AT_declaration' in d.attributes:
                        continue
                    lpc = d.attributes['DW_AT_low_pc'].value
                    hpc = d.attributes['DW_AT_high_pc'].value
                    if hpc < lpc:
                        hpc += lpc

                    function_name = d.attributes['DW_AT_name'].value
                    f = {}
                    f["lpc"] = lpc
                    f["hpc"] = hpc
                    f["args"] = {}
                    f["vars"] = {}
                    if 'DW_AT_frame_base' in d.attributes:
                        a = d.attributes['DW_AT_frame_base']
                        if a.form == 'DW_FORM_data4' or a.form == 'DW_FORM_sec_offset':
                            f["fb"] = location_lists.get_location_list_at_offset(a.value)
                        else:
                            f["fb"] = a.value
                    
                    for child in d.iter_children():
                        if child.tag == "DW_TAG_formal_parameter":
                            name = child.attributes['DW_AT_name'].value
                            v = {}
                            try:
                                if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']:
                                    v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value)
                                else:
                                    v["location"] = child.attributes['DW_AT_location'].value
                            except:
                                v["location"] = []
                            f["args"][name] = v
                        if child.tag == "DW_TAG_variable":
                            name = child.attributes['DW_AT_name'].value
                            v = {}
                            try:
                                if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']:
                                    v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value)
                                else:
                                    v["location"] = child.attributes['DW_AT_location'].value
                            except:
                                v["location"] = []
                            f["vars"][name] = v

                    functions[function_name] = f
                elif d.tag == 'DW_TAG_variable':
                    if d.attributes['DW_AT_decl_file'].value == 1:
                        try:
                            name = d.attributes['DW_AT_name'].value
                        except:
                            name = '(%s)' % str(d.attributes['DW_AT_name'])
                            
                        v = {}
                        try:
                            v["location"] = d.attributes['DW_AT_location'].value
                        except:
                            v["location"] = []
                        variables[name] = v

            x = {}

            fname = td.attributes['DW_AT_name'].value
            x["line_program"] = dw.line_program_for_CU(c).get_entries()
            x["lpc"] = td.attributes['DW_AT_low_pc'].value
            x["hpc"] = td.attributes['DW_AT_high_pc'].value
            x["comp_dir"] = td.attributes['DW_AT_comp_dir'].value
            x["functions"] = functions
            x["variables"] = variables

            self._compile_units[fname] = x
            if ((self._lowest_known_address is None) or
                    (self._lowest_known_address > x["lpc"])):
                self._lowest_known_address = x["lpc"]

            
        for c in self._compile_units:
            self._compile_units[c]["lines"] = {}
            for line in self._compile_units[c]["line_program"]:
                state = line.state
                if state is not None and not (state.end_sequence or state.basic_block or state.epilogue_begin or state.prologue_end):
                    cl = "%s+%d" % (c, state.line)
                    if state.address in self._addresses and self._addresses[state.address] != cl:
                        raise Exception("addr %x is both \"%s\" and \"%s+%d\"" % (state.address, self._addresses[state.address], c, state.line))
                    self._addresses[state.address] = cl
                    try: self._compile_units[c]["lines"][state.line] += [state.address]
                    except: self._compile_units[c]["lines"][state.line] = [state.address]
        
        if not cfi is None:
            print "CFA table:"
            for pc in sorted(self._cfa_rule.keys()):
                print "%x: %s\t\t(%s)" % (pc, str(self._cfa_rule[pc]), self.addr2line(pc))

    def addr2line(self, addr):
        try: return self._addresses[addr]
        except: return ''

    def loc_at(self, addr):
        line = self.addr2line(addr)
        while '' == line and addr >= self._lowest_known_address:
            addr -= 4
            line = self.addr2line(addr)
        if '' == line:
            return ("unknown", "", 0, "")

        cuname, culine = line.split("+")
        fname = ""
        c = self._compile_units[cuname]
        for f in c["functions"]:
            if ((c["functions"][f]["lpc"] <= addr) and
                    (c["functions"][f]["hpc"] >= addr)):
                fname = f
                break
        return (fname, cuname, culine, c["comp_dir"])

    def line2addr(self, fname, line):
        return self._compile_units[fname]["lines"][line]	
Ejemplo n.º 56
0
def test_files(fns,quiet=False,profile=False,runtime=False):
  for fn in fns:
    short_fn = fn.split("/")[-1] if "/" in fn else fn
    if os.path.isdir(fn):
      if not quiet:
        print("{} {}: skipping directory".format(notice, short_fn))
      continue
    try:
      elf = ELFFile(open(fn, "rb"))
    except ELFError:
      if not quiet:
        print("{} {}: skipping non-ELF file".format(notice, short_fn))
      continue

    arch = elf['e_machine']
    if arch not in SUPPORTED:
      if not quiet:
        print("{} {}: skipping ELF with unsupported architecture `{}`".format(notice, short_fn, arch))
      continue

    engine_functions = {}
    engine = "builtin"
    try:
      this_engine = Static(fn, debug=0) #no debug output
      if args.profile:
        #needs pycallgraph
        from pycallgraph import PyCallGraph
        from pycallgraph.output import GraphvizOutput
        graphviz = GraphvizOutput()
        graphviz.output_file = 'prof.png'
        with PyCallGraph(output=graphviz):
          this_engine.process()
      else:
        this_engine.process()
      engine_functions[engine] = {x.start for x in this_engine['functions']}
    except KeyboardInterrupt:
      print("{} User stopped processing test cases.".format(notice))
      sys.exit()
    except MemoryError:
      #print("{} {}: bap encountered a memory error.".format(fail, short_fn, engine)
      continue
    except Exception as e:
      print("{} {}: {} engine failed to process file with `{}'".format(fail, short_fn, engine, e))
      continue
    if runtime:
      if not quiet:
        print("{} {}: {} ran without exceptions".format(ok_green, short_fn, engine))
      continue

    if runtime:
      continue

    if elf.has_dwarf_info():
      dwarfinfo = elf.get_dwarf_info()
      dwarf_functions = get_functions(dwarfinfo)
      for engine,functions in engine_functions.items():
        missed = dwarf_functions - functions
        total_fxns = len(dwarf_functions)
        if len(missed) == 0:
          print("{} {}: {} engine found all {} function(s)".format(ok_green,
                                                                   short_fn,
                                                                   engine,
                                                                   total_fxns))
        else:
          status = fail if len(missed) == total_fxns else warn
          if args.verbose:
            fmt = "{} {}: {} engine missed {}/{} function(s): {}"
            missed_s = ", ".join(hex(fxn) for fxn in missed)
            print(fmt.format(status, short_fn, engine,
                    len(missed), total_fxns, missed_s))
          else:
            fmt = "{} {}: {} engine missed {}/{} function(s)"
            print(fmt.format(status, short_fn, engine,
                    len(missed), total_fxns))
    else:
      for engine,functions in engine_functions.items():
        status = fail if len(functions) == 0 else ok_blue
        print("{} {}: {} engine found {} function(s). (dwarf info unavailable)".format(status, short_fn, engine, len(functions)))
Ejemplo n.º 57
0
class ReadElf(object):
    """ display_* methods are used to emit output into the output stream
    """
    def __init__(self, file, output):
        """ file:
                stream object with the ELF file to read

            output:
                output stream to write to
        """
        self.elffile = ELFFile(file)
        self.output = output

        # Lazily initialized if a debug dump is requested
        self._dwarfinfo = None

    def display_file_header(self):
        """ Display the ELF file header
        """
        self._emitline('ELF Header:')
        self._emit('  Magic:   ')
        self._emitline(' '.join('%2.2x' % byte2int(b)
                                    for b in self.elffile.e_ident_raw))
        header = self.elffile.header
        e_ident = header['e_ident']
        self._emitline('  Class:                             %s' %
                describe_ei_class(e_ident['EI_CLASS']))
        self._emitline('  Data:                              %s' %
                describe_ei_data(e_ident['EI_DATA']))
        self._emitline('  Version:                           %s' %
                describe_ei_version(e_ident['EI_VERSION']))
        self._emitline('  OS/ABI:                            %s' %
                describe_ei_osabi(e_ident['EI_OSABI']))
        self._emitline('  ABI Version:                       %d' %
                e_ident['EI_ABIVERSION'])
        self._emitline('  Type:                              %s' %
                describe_e_type(header['e_type']))
        self._emitline('  Machine:                           %s' %
                describe_e_machine(header['e_machine']))
        self._emitline('  Version:                           %s' %
                describe_e_version_numeric(header['e_version']))
        self._emitline('  Entry point address:               %s' %
                self._format_hex(header['e_entry']))
        self._emit('  Start of program headers:          %s' %
                header['e_phoff'])
        self._emitline(' (bytes into file)')
        self._emit('  Start of section headers:          %s' %
                header['e_shoff'])
        self._emitline(' (bytes into file)')
        self._emitline('  Flags:                             %s' %
                self._format_hex(header['e_flags']))
        self._emitline('  Size of this header:               %s (bytes)' %
                header['e_ehsize'])
        self._emitline('  Size of program headers:           %s (bytes)' %
                header['e_phentsize'])
        self._emitline('  Number of program headers:         %s' %
                header['e_phnum'])
        self._emitline('  Size of section headers:           %s (bytes)' %
                header['e_shentsize'])
        self._emitline('  Number of section headers:         %s' %
                header['e_shnum'])
        self._emitline('  Section header string table index: %s' %
                header['e_shstrndx'])

    def display_program_headers(self, show_heading=True):
        """ Display the ELF program headers.
            If show_heading is True, displays the heading for this information
            (Elf file type is...)
        """
        self._emitline()
        if self.elffile.num_segments() == 0:
            self._emitline('There are no program headers in this file.')
            return

        elfheader = self.elffile.header
        if show_heading:
            self._emitline('Elf file type is %s' %
                describe_e_type(elfheader['e_type']))
            self._emitline('Entry point is %s' %
                self._format_hex(elfheader['e_entry']))
            # readelf weirness - why isn't e_phoff printed as hex? (for section
            # headers, it is...)
            self._emitline('There are %s program headers, starting at offset %s' % (
                elfheader['e_phnum'], elfheader['e_phoff']))
            self._emitline()

        self._emitline('Program Headers:')

        # Now comes the table of program headers with their attributes. Note
        # that due to different formatting constraints of 32-bit and 64-bit
        # addresses, there are some conditions on elfclass here.
        #
        # First comes the table heading
        #
        if self.elffile.elfclass == 32:
            self._emitline('  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align')
        else:
            self._emitline('  Type           Offset             VirtAddr           PhysAddr')
            self._emitline('                 FileSiz            MemSiz              Flags  Align')

        # Now the entries
        #
        for segment in self.elffile.iter_segments():
            self._emit('  %-14s ' % describe_p_type(segment['p_type']))

            if self.elffile.elfclass == 32:
                self._emitline('%s %s %s %s %s %-3s %s' % (
                    self._format_hex(segment['p_offset'], fieldsize=6),
                    self._format_hex(segment['p_vaddr'], fullhex=True),
                    self._format_hex(segment['p_paddr'], fullhex=True),
                    self._format_hex(segment['p_filesz'], fieldsize=5),
                    self._format_hex(segment['p_memsz'], fieldsize=5),
                    describe_p_flags(segment['p_flags']),
                    self._format_hex(segment['p_align'])))
            else: # 64
                self._emitline('%s %s %s' % (
                    self._format_hex(segment['p_offset'], fullhex=True),
                    self._format_hex(segment['p_vaddr'], fullhex=True),
                    self._format_hex(segment['p_paddr'], fullhex=True)))
                self._emitline('                 %s %s  %-3s    %s' % (
                    self._format_hex(segment['p_filesz'], fullhex=True),
                    self._format_hex(segment['p_memsz'], fullhex=True),
                    describe_p_flags(segment['p_flags']),
                    # lead0x set to False for p_align, to mimic readelf.
                    # No idea why the difference from 32-bit mode :-|
                    self._format_hex(segment['p_align'], lead0x=False)))

            if isinstance(segment, InterpSegment):
                self._emitline('      [Requesting program interpreter: %s]' %
                    bytes2str(segment.get_interp_name()))

        # Sections to segments mapping
        #
        if self.elffile.num_sections() == 0:
            # No sections? We're done
            return

        self._emitline('\n Section to Segment mapping:')
        self._emitline('  Segment Sections...')

        for nseg, segment in enumerate(self.elffile.iter_segments()):
            self._emit('   %2.2d     ' % nseg)

            for section in self.elffile.iter_sections():
                if (    not section.is_null() and
                        segment.section_in_segment(section)):
                    self._emit('%s ' % bytes2str(section.name))

            self._emitline('')

    def display_section_headers(self, show_heading=True):
        """ Display the ELF section headers
        """
        elfheader = self.elffile.header
        if show_heading:
            self._emitline('There are %s section headers, starting at offset %s' % (
                elfheader['e_shnum'], self._format_hex(elfheader['e_shoff'])))

        self._emitline('\nSection Header%s:' % (
            's' if elfheader['e_shnum'] > 1 else ''))

        # Different formatting constraints of 32-bit and 64-bit addresses
        #
        if self.elffile.elfclass == 32:
            self._emitline('  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al')
        else:
            self._emitline('  [Nr] Name              Type             Address           Offset')
            self._emitline('       Size              EntSize          Flags  Link  Info  Align')

        # Now the entries
        #
        for nsec, section in enumerate(self.elffile.iter_sections()):
            self._emit('  [%2u] %-17.17s %-15.15s ' % (
                nsec, bytes2str(section.name), describe_sh_type(section['sh_type'])))

            if self.elffile.elfclass == 32:
                self._emitline('%s %s %s %s %3s %2s %3s %2s' % (
                    self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False),
                    self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False),
                    self._format_hex(section['sh_size'], fieldsize=6, lead0x=False),
                    self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))
            else: # 64
                self._emitline(' %s  %s' % (
                    self._format_hex(section['sh_addr'], fullhex=True, lead0x=False),
                    self._format_hex(section['sh_offset'],
                        fieldsize=16 if section['sh_offset'] > 0xffffffff else 8,
                        lead0x=False)))
                self._emitline('       %s  %s %3s      %2s   %3s     %s' % (
                    self._format_hex(section['sh_size'], fullhex=True, lead0x=False),
                    self._format_hex(section['sh_entsize'], fullhex=True, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))

        self._emitline('Key to Flags:')
        self._emit('  W (write), A (alloc), X (execute), M (merge), S (strings)')
        if self.elffile['e_machine'] in ('EM_X86_64', 'EM_L10M'):
            self._emitline(', l (large)')
        else:
            self._emitline()
        self._emitline('  I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)')
        self._emitline('  O (extra OS processing required) o (OS specific), p (processor specific)')

    def display_symbol_tables(self):
        """ Display the symbol tables contained in the file
        """
        for section in self.elffile.iter_sections():
            if not isinstance(section, SymbolTableSection):
                continue

            if section['sh_entsize'] == 0:
                self._emitline("\nSymbol table '%s' has a sh_entsize of zero!" % (
                    bytes2str(section.name)))
                continue

            self._emitline("\nSymbol table '%s' contains %s entries:" % (
                bytes2str(section.name), section.num_symbols()))

            if self.elffile.elfclass == 32:
                self._emitline('   Num:    Value  Size Type    Bind   Vis      Ndx Name')
            else: # 64
                self._emitline('   Num:    Value          Size Type    Bind   Vis      Ndx Name')

            for nsym, symbol in enumerate(section.iter_symbols()):
                # symbol names are truncated to 25 chars, similarly to readelf
                self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s' % (
                    nsym,
                    self._format_hex(symbol['st_value'], fullhex=True, lead0x=False),
                    symbol['st_size'],
                    describe_symbol_type(symbol['st_info']['type']),
                    describe_symbol_bind(symbol['st_info']['bind']),
                    describe_symbol_visibility(symbol['st_other']['visibility']),
                    describe_symbol_shndx(symbol['st_shndx']),
                    bytes2str(symbol.name)))

    def display_relocations(self):
        """ Display the relocations contained in the file
        """
        has_relocation_sections = False
        for section in self.elffile.iter_sections():
            if not isinstance(section, RelocationSection):
                continue

            has_relocation_sections = True
            self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % (
                bytes2str(section.name),
                self._format_hex(section['sh_offset']),
                section.num_relocations()))
            if section.is_RELA():
                self._emitline("  Offset          Info           Type           Sym. Value    Sym. Name + Addend")
            else:
                self._emitline(" Offset     Info    Type            Sym.Value  Sym. Name")

            # The symbol table section pointed to in sh_link
            symtable = self.elffile.get_section(section['sh_link'])

            for rel in section.iter_relocations():
                hexwidth = 8 if self.elffile.elfclass == 32 else 12
                self._emit('%s  %s %-17.17s' % (
                    self._format_hex(rel['r_offset'],
                        fieldsize=hexwidth, lead0x=False),
                    self._format_hex(rel['r_info'],
                        fieldsize=hexwidth, lead0x=False),
                    describe_reloc_type(
                        rel['r_info_type'], self.elffile)))

                if rel['r_info_sym'] == 0:
                    self._emitline()
                    continue

                symbol = symtable.get_symbol(rel['r_info_sym'])
                # Some symbols have zero 'st_name', so instead what's used is
                # the name of the section they point at
                if symbol['st_name'] == 0:
                    symsec = self.elffile.get_section(symbol['st_shndx'])
                    symbol_name = symsec.name
                else:
                    symbol_name = symbol.name
                self._emit(' %s %s%22.22s' % (
                    self._format_hex(
                        symbol['st_value'],
                        fullhex=True, lead0x=False),
                    '  ' if self.elffile.elfclass == 32 else '',
                    bytes2str(symbol_name)))
                if section.is_RELA():
                    self._emit(' %s %x' % (
                        '+' if rel['r_addend'] >= 0 else '-',
                        abs(rel['r_addend'])))
                self._emitline()

        if not has_relocation_sections:
            self._emitline('\nThere are no relocations in this file.')

    def display_hex_dump(self, section_spec):
        """ Display a hex dump of a section. section_spec is either a section
            number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            self._emitline("Section '%s' does not exist in the file!" % (
                section_spec))
            return

        self._emitline("\nHex dump of section '%s':" % bytes2str(section.name))
        self._note_relocs_for_section(section)
        addr = section['sh_addr']
        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            bytesleft = len(data) - dataptr
            # chunks of 16 bytes per line
            linebytes = 16 if bytesleft > 16 else bytesleft

            self._emit('  %s ' % self._format_hex(addr, fieldsize=8))
            for i in range(16):
                if i < linebytes:
                    self._emit('%2.2x' % byte2int(data[dataptr + i]))
                else:
                    self._emit('  ')
                if i % 4 == 3:
                    self._emit(' ')

            for i in range(linebytes):
                c = data[dataptr + i : dataptr + i + 1]
                if byte2int(c[0]) >= 32 and byte2int(c[0]) < 0x7f:
                    self._emit(bytes2str(c))
                else:
                    self._emit(bytes2str(b'.'))

            self._emitline()
            addr += linebytes
            dataptr += linebytes

        self._emitline()

    def display_string_dump(self, section_spec):
        """ Display a strings dump of a section. section_spec is either a
            section number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            self._emitline("Section '%s' does not exist in the file!" % (
                section_spec))
            return

        self._emitline("\nString dump of section '%s':" % bytes2str(section.name))

        found = False
        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            while ( dataptr < len(data) and
                    not (32 <= byte2int(data[dataptr]) <= 127)):
                dataptr += 1

            if dataptr >= len(data):
                break

            endptr = dataptr
            while endptr < len(data) and byte2int(data[endptr]) != 0:
                endptr += 1

            found = True
            self._emitline('  [%6x]  %s' % (
                dataptr, bytes2str(data[dataptr:endptr])))

            dataptr = endptr

        if not found:
            self._emitline('  No strings found in this section.')
        else:
            self._emitline()

    def display_debug_dump(self, dump_what):
        """ Dump a DWARF section
        """
        self._init_dwarfinfo()
        if self._dwarfinfo is None:
            return

        set_global_machine_arch(self.elffile.get_machine_arch())

        if dump_what == 'info':
            self._dump_debug_info()
        elif dump_what == 'decodedline':
            self._dump_debug_line_programs()
        elif dump_what == 'frames':
            self._dump_debug_frames()
        elif dump_what == 'frames-interp':
            self._dump_debug_frames_interp()
        else:
            self._emitline('debug dump not yet supported for "%s"' % dump_what)

    def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True):
        """ Format an address into a hexadecimal string.

            fieldsize:
                Size of the hexadecimal field (with leading zeros to fit the
                address into. For example with fieldsize=8, the format will
                be %08x
                If None, the minimal required field size will be used.

            fullhex:
                If True, override fieldsize to set it to the maximal size
                needed for the elfclass

            lead0x:
                If True, leading 0x is added
        """
        s = '0x' if lead0x else ''
        if fullhex:
            fieldsize = 8 if self.elffile.elfclass == 32 else 16
        if fieldsize is None:
            field = '%x'
        else:
            field = '%' + '0%sx' % fieldsize
        return s + field % addr

    def _section_from_spec(self, spec):
        """ Retrieve a section given a "spec" (either number or name).
            Return None if no such section exists in the file.
        """
        try:
            num = int(spec)
            if num < self.elffile.num_sections():
                return self.elffile.get_section(num)
            else:
                return None
        except ValueError:
            # Not a number. Must be a name then
            return self.elffile.get_section_by_name(str2bytes(spec))

    def _note_relocs_for_section(self, section):
        """ If there are relocation sections pointing to the givne section,
            emit a note about it.
        """
        for relsec in self.elffile.iter_sections():
            if isinstance(relsec, RelocationSection):
                info_idx = relsec['sh_info']
                if self.elffile.get_section(info_idx) == section:
                    self._emitline('  Note: This section has relocations against it, but these have NOT been applied to this dump.')
                    return

    def _init_dwarfinfo(self):
        """ Initialize the DWARF info contained in the file and assign it to
            self._dwarfinfo.
            Leave self._dwarfinfo at None if no DWARF info was found in the file
        """
        if self._dwarfinfo is not None:
            return

        if self.elffile.has_dwarf_info():
            self._dwarfinfo = self.elffile.get_dwarf_info()
        else:
            self._dwarfinfo = None

    def _dump_debug_info(self):
        """ Dump the debugging info section.
        """
        self._emitline('Contents of the .debug_info section:\n')

        # Offset of the .debug_info section in the stream
        section_offset = self._dwarfinfo.debug_info_sec.global_offset

        for cu in self._dwarfinfo.iter_CUs():
            self._emitline('  Compilation Unit @ offset %s:' %
                self._format_hex(cu.cu_offset))
            self._emitline('   Length:        %s (%s)' % (
                self._format_hex(cu['unit_length']),
                '%s-bit' % cu.dwarf_format()))
            self._emitline('   Version:       %s' % cu['version']),
            self._emitline('   Abbrev Offset: %s' % cu['debug_abbrev_offset']),
            self._emitline('   Pointer Size:  %s' % cu['address_size'])

            # The nesting depth of each DIE within the tree of DIEs must be
            # displayed. To implement this, a counter is incremented each time
            # the current DIE has children, and decremented when a null die is
            # encountered. Due to the way the DIE tree is serialized, this will
            # correctly reflect the nesting depth
            #
            die_depth = 0
            for die in cu.iter_DIEs():
                if die.is_null():
                    die_depth -= 1
                    continue
                self._emitline(' <%s><%x>: Abbrev Number: %s (%s)' % (
                    die_depth,
                    die.offset,
                    die.abbrev_code,
                    die.tag))

                for attr in itervalues(die.attributes):
                    name = attr.name
                    # Unknown attribute values are passed-through as integers
                    if isinstance(name, int):
                        name = 'Unknown AT value: %x' % name
                    self._emitline('    <%2x>   %-18s: %s' % (
                        attr.offset,
                        name,
                        describe_attr_value(
                            attr, die, section_offset)))

                if die.has_children:
                    die_depth += 1

        self._emitline()

    def _dump_debug_line_programs(self):
        """ Dump the (decoded) line programs from .debug_line
            The programs are dumped in the order of the CUs they belong to.
        """
        self._emitline('Decoded dump of debug contents of section .debug_line:\n')

        for cu in self._dwarfinfo.iter_CUs():
            lineprogram = self._dwarfinfo.line_program_for_CU(cu)

            cu_filename = ''
            if len(lineprogram['include_directory']) > 0:
                cu_filename = '%s/%s' % (
                    bytes2str(lineprogram['include_directory'][0]),
                    bytes2str(lineprogram['file_entry'][0].name))
            else:
                cu_filename = bytes2str(lineprogram['file_entry'][0].name)

            self._emitline('CU: %s:' % cu_filename)
            self._emitline('File name                            Line number    Starting address')

            # Print each state's file, line and address information. For some
            # instructions other output is needed to be compatible with
            # readelf.
            for entry in lineprogram.get_entries():
                state = entry.state
                if state is None:
                    # Special handling for commands that don't set a new state
                    if entry.command == DW_LNS_set_file:
                        file_entry = lineprogram['file_entry'][entry.args[0] - 1]
                        if file_entry.dir_index == 0:
                            # current directory
                            self._emitline('\n./%s:[++]' % (
                                bytes2str(file_entry.name)))
                        else:
                            self._emitline('\n%s/%s:' % (
                                bytes2str(lineprogram['include_directory'][file_entry.dir_index - 1]),
                                bytes2str(file_entry.name)))
                    elif entry.command == DW_LNE_define_file:
                        self._emitline('%s:' % (
                            bytes2str(lineprogram['include_directory'][entry.args[0].dir_index])))
                elif not state.end_sequence:
                    # readelf doesn't print the state after end_sequence
                    # instructions. I think it's a bug but to be compatible
                    # I don't print them too.
                    self._emitline('%-35s  %11d  %18s' % (
                        bytes2str(lineprogram['file_entry'][state.file - 1].name),
                        state.line,
                        '0' if state.address == 0 else
                               self._format_hex(state.address)))
                if entry.command == DW_LNS_copy:
                    # Another readelf oddity...
                    self._emitline()

    def _dump_debug_frames(self):
        """ Dump the raw frame information from .debug_frame
        """
        if not self._dwarfinfo.has_CFI():
            return
        self._emitline('Contents of the .debug_frame section:')

        for entry in self._dwarfinfo.CFI_entries():
            if isinstance(entry, CIE):
                self._emitline('\n%08x %08x %08x CIE' % (
                    entry.offset, entry['length'], entry['CIE_id']))
                self._emitline('  Version:               %d' % entry['version'])
                self._emitline('  Augmentation:          "%s"' % bytes2str(entry['augmentation']))
                self._emitline('  Code alignment factor: %u' % entry['code_alignment_factor'])
                self._emitline('  Data alignment factor: %d' % entry['data_alignment_factor'])
                self._emitline('  Return address column: %d' % entry['return_address_register'])
                self._emitline()
            else: # FDE
                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_pointer'],
                    entry.cie.offset,
                    entry['initial_location'],
                    entry['initial_location'] + entry['address_range']))

            self._emit(describe_CFI_instructions(entry))
        self._emitline()

    def _dump_debug_frames_interp(self):
        """ Dump the interpreted (decoded) frame information from .debug_frame
        """
        if not self._dwarfinfo.has_CFI():
            return

        self._emitline('Contents of the .debug_frame section:')

        for entry in self._dwarfinfo.CFI_entries():
            if isinstance(entry, CIE):
                self._emitline('\n%08x %08x %08x CIE "%s" cf=%d df=%d ra=%d' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_id'],
                    bytes2str(entry['augmentation']),
                    entry['code_alignment_factor'],
                    entry['data_alignment_factor'],
                    entry['return_address_register']))
                ra_regnum = entry['return_address_register']
            else: # FDE
                self._emitline('\n%08x %08x %08x FDE cie=%08x pc=%08x..%08x' % (
                    entry.offset,
                    entry['length'],
                    entry['CIE_pointer'],
                    entry.cie.offset,
                    entry['initial_location'],
                    entry['initial_location'] + entry['address_range']))
                ra_regnum = entry.cie['return_address_register']

            # Print the heading row for the decoded table
            self._emit('   LOC')
            self._emit('  ' if entry.structs.address_size == 4 else '          ')
            self._emit(' CFA      ')

            # Decode the table nad look at the registers it describes.
            # We build reg_order here to match readelf's order. In particular,
            # registers are sorted by their number, and the register matching
            # ra_regnum is always listed last with a special heading.
            decoded_table = entry.get_decoded()
            reg_order = sorted(ifilter(
                lambda r: r != ra_regnum,
                decoded_table.reg_order))

            # Headings for the registers
            for regnum in reg_order:
                self._emit('%-6s' % describe_reg_name(regnum))
            self._emitline('ra      ')

            # Now include ra_regnum in reg_order to print its values similarly
            # to the other registers.
            reg_order.append(ra_regnum)
            for line in decoded_table.table:
                self._emit(self._format_hex(
                    line['pc'], fullhex=True, lead0x=False))
                self._emit(' %-9s' % describe_CFI_CFA_rule(line['cfa']))

                for regnum in reg_order:
                    if regnum in line:
                        s = describe_CFI_register_rule(line[regnum])
                    else:
                        s = 'u'
                    self._emit('%-6s' % s)
                self._emitline()
        self._emitline()

    def _emit(self, s=''):
        """ Emit an object to output
        """
        self.output.write(str(s))

    def _emitline(self, s=''):
        """ Emit an object to output, followed by a newline
        """
        self.output.write(str(s) + '\n')