Ejemplo n.º 1
0
def main():
    with open(sys.argv[1], 'rb') as f:
        elf = ELFFile(f)
        if not elf.has_dwarf_info():
            print("Object file has no dwarf info!")
            sys.exit(1)

        types = {}

        global_offset = elf.get_dwarf_info().debug_info_sec.global_offset

        for cu in elf.get_dwarf_info().iter_CUs():
            cu_name = cu.get_top_DIE().attributes['DW_AT_name'].value.decode(
                'utf-8')
            print('\x1b[32m\x1b[1mProcessing %s\x1b[0m' % cu_name)

            # First, map top level types
            dies = list(cu.iter_DIEs())

            i = 0
            while i < len(dies):
                offset = dies[i].offset
                current = dies[i]
                i += 1

                common_types = {
                    'DW_TAG_structure_type': Struct,
                    'DW_TAG_class_type': Struct,
                    'DW_TAG_base_type': Primitive,
                    'DW_TAG_typedef': Typedef,
                    'DW_TAG_array_type': Array,
                }

                if current.tag in common_types:
                    assert offset not in types
                    types[offset] = common_types[current.tag](current)
                else:
                    pass  # print("Skipping processing of '%s'" % current.tag)

            for t in types.values():
                t.finalize(types)

            header = '%-4s |\t%-100s |\t%s' % ('#', 'type', 'size')
            print(header)
            print('-' * len(header.expandtabs()))
            for o, t in types.items():
                color = '\x1b[31m\x1b[31m' if t.has_padding() else ''
                print('{:04x} |\t{color}{:100}\x1b[0m |\t{}'.format(
                    o, repr(t), t.byte_size, color=color))
            print('-' * len(header.expandtabs()))
            print()
            for o, t in types.items():
                if t.has_padding():
                    print(
                        "Found padded type '%s' at %s:%u" %
                        (t, cu_name,
                         t.source_object.attributes['DW_AT_decl_line'].value))
                    for p in t.get_padding_list():
                        print('\t%s' % p)
Ejemplo n.º 2
0
def process_file(filename):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return {}, {}

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()
        func_map, global_map, type_map, struct_map, global_access_map = {}, {}, {}, {}, {}

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            top_DIE = CU.get_top_DIE()
            variables = {}
            members = {}
		

            die_info_rec_struct(top_DIE, struct_map, members, global_access_map)  
            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE, func_map, global_map, type_map, struct_map, variables, global_access_map)
	
	
        return func_map, global_map, type_map, global_access_map
Ejemplo n.º 3
0
def resolve_addr(fn, ip):
    if fn in open_files:
        elffile = open_files[fn]
    else:
        f = open(fn, 'rb')
        elffile = ELFFile(f)
        open_files[fn] = elffile

    if fn not in lines and elffile.has_dwarf_info():
        lines[fn] = build_line_table(elffile.get_dwarf_info())

    if fn not in symtables:
        symtables[fn] = build_symtab(elffile)

    loc = None
    offset = None
    if fn in symtables:
        sym = find_le(symtables[fn], ip)
        if sym:
            loc, offset = sym[2], ip - sym[0]

    src = None
    if fn in lines:
        pos = find_le(lines[fn], ip)
        if pos:
            src = "%s:%d" % (pos[2], pos[3])    

    return loc, offset, src
Ejemplo n.º 4
0
    def get_hex():
        """
        Get byte encodings corresponding to each source code line
        """
        f = open('a.out', 'rb')
        info = ELFFile(f)
        dwarf = info.get_dwarf_info()
        cu = next(dwarf.iter_CUs())
        lp = dwarf.line_program_for_CU(cu).get_entries()

        textsec = info.get_section_by_name('.text')
        voff = textsec.header['sh_addr'] - textsec.header['sh_offset']

        curr_line = 0
        res = [''] * (lp[-1].state.line + 1)
        f.seek(lp.pop(0).args[0] - voff, os.SEEK_SET)

        for e in lp:
            if len(e.args) == 0:
                curr_line = e.state.line - 1
            elif len(e.args) == 1:
                res[curr_line] += f.read(e.args[0])
            elif len(e.args) > 1:
                if e.args[0] == 0:
                    f.seek(e.args[1], os.SEEK_CUR)
                    curr_line = e.state.line - 1
                else:
                    res[curr_line] += f.read(e.args[1])
                    curr_line += e.args[0]
        f.close()
        return res
Ejemplo n.º 5
0
 def _get_impalad_dwarf_info(self):
   """
   Read the impalad_path ELF binary, which is supposed to contain DWARF, and read the
   DWARF to understand the compiler options. Return a 2-tuple of the two useful DIE
   attributes of the first compile unit: the DW_AT_name and DW_AT_producer. If
   something goes wrong doing this, log a warning and return nothing.
   """
   # Some useful references:
   # - be/CMakeLists.txt
   # - gcc(1), especially -grecord-gcc-switches, -g, -ggdb, -gdwarf-2
   # - readelf(1)
   # - general reading about DWARF
   # A useful command for exploration without having to wade through many bytes is:
   # readelf --debug-dump=info --dwarf-depth=1 impalad
   # The DWARF lines are long, raw, and nasty; I'm hesitant to paste them here, so
   # curious readers are highly encouraged to try the above, or read IMPALA-3501.
   die_name = None
   die_producer = None
   try:
     with open(self.impalad_path, 'rb') as fh:
       impalad_elf = ELFFile(fh)
       if impalad_elf.has_dwarf_info():
         dwarf_info = impalad_elf.get_dwarf_info()
         # We only need the first CU, hence the unconventional use of the iterator
         # protocol.
         cu_iterator = dwarf_info.iter_CUs()
         first_cu = next(cu_iterator)
         top_die = first_cu.get_top_DIE()
         die_name = top_die.attributes['DW_AT_name'].value
         die_producer = top_die.attributes['DW_AT_producer'].value
   except Exception as e:
     LOG.warn('Failure to read DWARF info from {0}: {1}'.format(self.impalad_path,
                                                                str(e)))
   return die_name, die_producer
Ejemplo n.º 6
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset {0!s}, length {1!s}'.format(
                CU.cu_offset, CU['unit_length']))

            # The first DIE in each compile unit describes it.
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag={0!s}'.format(top_DIE.tag))

            # We're interested in the filename...
            print('    name={0!s}'.format(top_DIE.get_full_path()))
Ejemplo n.º 7
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' %
                  (CU.cu_offset, CU['unit_length']))

            # The first DIE in each compile unit describes it.
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # We're interested in the filename...
            print('    name=%s' % top_DIE.get_full_path())
Ejemplo n.º 8
0
class DwarfInfo:
    def __init__(self, exe):
        self.binary = exe
        self.fd = open(exe, 'rb')
        self.elf_file = ELFFile(self.fd)

        if not self.elf_file.has_dwarf_info():
            raise ('Binary contains no dwarf info section.')
        self.dwarf_info = self.elf_file.get_dwarf_info()

    def __del__(self):
        self.fd.close()

    def lookup(self, address):
        # iterate over the compile units(CUs)
        for CU in self.dwarf_info.iter_CUs():
            line_progs = self.dwarf_info.line_program_for_CU(CU)
            prev_state = None
            # debug-line parse the table like `dwarfdump -debug-line ./main`
            for entry in line_progs.get_entries():
                if entry.state is None:
                    continue
                if entry.state.end_sequence:
                    prev_state = None
                    continue
                if prev_state and prev_state.address <= address < entry.state.address:
                    file_name = line_progs['file_entry'][prev_state.file -
                                                         1].name
                    line = prev_state.line
                    return file_name, line
                prev_state = entry.state
        raise ('Could not find address')
Ejemplo n.º 9
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # Start with the top DIE, the root for this CU's DIE tree
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # We're interested in the filename...
            print('    name=%s' % top_DIE.get_full_path())

            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE)
Ejemplo n.º 10
0
def retrieve_pub_functions(binary):
    symbols = []
    with open(binary, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return -1

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # get .debug_pubtypes section.
        pubnames = dwarfinfo.get_pubnames()
        if pubnames is None:
            print('ERROR: No .debug_pubnames section found in ELF.')
            return -1
        else:
            print('%d entries found in .debug_pubnames' % len(pubnames))

            # dump all entries in .debug_pubnames section.
            print('Dumping .debug_pubnames table ...')
            print('-' * 66)
            print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS'))
            print('-' * 66)
            for (name, entry) in pubnames.items():
                symbols.append(name)
                print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs))
                #print(entry)
            print('-' * 66)

            print(symbols)

            return symbols
Ejemplo n.º 11
0
    def run(self):
        elf = ELFFile(self.f)

        if not elf.has_dwarf_info():
            GLib.idle_add(self.window.display_error, "This file has no DWARF info.")
            return

        di = elf.get_dwarf_info()

        builder = DwarfModelBuilder(di, self.verbose)
        total = builder.num_cus()
        n = 0

        generator = builder.build_step()
        file_elem = next(generator)
        while not file_elem:
            if self.stop_requested:
                return

            GLib.idle_add(self.window.load_progress, float(n) / total)
            n = n + 1
            file_elem = next(generator)


        #root_elem = builder.build()

        if self.stop_requested:
            return

        GLib.idle_add(self.window.done_loading, file_elem)
Ejemplo n.º 12
0
 def __init__(self, filename):
     self.filename = filename
     with open(filename, 'rb') as fp:
         elf = ELFFile(fp)
         assert elf.has_dwarf_info(), \
                "No DWARF information for '{}'".format(filename)
         self.parseAddressRanges(elf.get_dwarf_info())
Ejemplo n.º 13
0
    def __init__(self, binary: str):
        with open(binary, "rb") as b:
            elffile = ELFFile(b)

            #Symbol table
            for section in elffile.iter_sections():
                if isinstance(section, SymbolTableSection):
                    self.symbols = [Symbol(sym.name, sym['st_value'], sym['st_info']['type']) for sym in section.iter_symbols()
                                    if len(sym.name) > 0]

                    continue

            if not elffile.has_dwarf_info():
                raise Exception("This tool needs gdb info.")

            dbg = elffile.get_dwarf_info()

            def file_entry_to_abs(file_entry, linep: LineProgram) -> str:
                di = file_entry.dir_index
                if di > 0:
                    return path.join(linep['include_directory'][di-1].decode(), file_entry.name.decode())
                else:
                    return path.join('.', file_entry.name.decode())

            cu_helper = [(cu, dbg.line_program_for_CU(cu)) for cu in dbg.iter_CUs()]

            self.compile_units = [
                CompileUnitInput(die.attributes['DW_AT_name'].value.decode(),
                                 die.attributes['DW_AT_comp_dir'].value.decode(),
                                 [file_entry_to_abs(fe, linep) for fe in linep['file_entry']])
                for cu, linep in cu_helper for die in cu.iter_DIEs() if die.tag == 'DW_TAG_compile_unit'
            ]

            # find compile units
            self.markers = []

            for msym in (sym for sym in self.symbols if sym.name.startswith('__metal_serial_')):
                try:
                    nx : Tuple[LineProgramEntry, LineProgram] = next((entry, linep) for (cu, linep) in cu_helper for entry in linep.get_entries()
                               if entry.state is not None and entry.state.address == msym.address)
                    (loc, linep) = nx

                    abs_file_entry = file_entry_to_abs(linep['file_entry'][loc.state.file - 1], linep)

                    # check if marker already exists -
                    #for existing_marker in self.markers:
                    #    if loc.state.line == existing_marker.line and loc.state.column == existing_marker.column and existing_marker.file == abs_file_entry:
                    #        print(msym.name, existing_marker.name)
                    #        raise Exception("Duplicate code markers found at {}({})".format(existing_marker.file, existing_marker.line))

                    self.markers.append(Marker(
                        msym.name,
                        msym.address,
                        msym.symbol_type,
                        abs_file_entry,
                        loc.state.line,
                        loc.state.column
                    ))
                except StopIteration:
                    raise Exception('Could not find code location for {} at 0x{:x} - this is most likely due to missing gdb symbols.'.format(msym.name, msym.address))
Ejemplo n.º 14
0
def process_file(filename, root, pkgroot):
    res = set()
    if os.path.isdir(filename) or not os.access(filename, os.R_OK):
        return res
    try:
        with open(filename, 'rb') as f:
            try:
                elffile = ELFFile(f)
                if not elffile.has_dwarf_info():
                    return res

                dwarfinfo = elffile.get_dwarf_info()
                for CU in dwarfinfo.iter_CUs():
                    # Every compilation unit in the DWARF information may or may not
                    # have a corresponding line program in .debug_line.
                    line_program = dwarfinfo.line_program_for_CU(CU)
                    if line_program is None:
                        continue

                    # Print a reverse mapping of filename -> #entries
                    res.update(line_entry_mapping(line_program))
            except ELFError:
                return find_in_source_root(filename, root, pkgroot)
    except OSError:
        pass
    return res
Ejemplo n.º 15
0
    def generate_header(self, data_out_filename, glob_data_out, namesp_out):
        """Find all top level (global) variables in the ELF file and generate
        a header.
        """
        glob_data_out.write("/* generated by userspace-header-gen.py */\n")
        glob_data_out.write("#include <rtems/linkersets.h>\n")

        namesp_out.write("/* generated by userspace-header-gen.py */\n")

        for objfile in self._objfiles:
            elffile = ELFFile(objfile)
            if not elffile.has_dwarf_info():
                raise NoDwarfInfoError()

            # Don't relocate DWARF sections. This is not necessary for us but
            # makes problems on ARM with current pyelftools (version 0.24)
            dwarfinfo = elffile.get_dwarf_info(relocate_dwarf_sections=False)

            for cu in dwarfinfo.iter_CUs():
                if self._verbose >= VERBOSE_SOME:
                    self._err.write('Found a CU at offset %s, length %s\n' % \
                                    (cu.cu_offset, cu['unit_length']))

                lineprog = dwarfinfo.line_program_for_CU(cu)
                headergen = HeaderGenCU(cu, self._progname, lineprog,
                                        self._err, self._verbose,
                                        self._filterre)
                headergen.generate_header(data_out_filename, glob_data_out,
                                          namesp_out)
Ejemplo n.º 16
0
def get_all_offsets_from_ELF(filename, structs):
    # Do argument validation at the beginning, so that if there's a problem, we don't have to wait for the file to parse first
    names = []
    for struct in structs:
        kind, name = _validate_struct(struct)
        names.append((KIND2TAG[kind], name.encode('ascii')))

    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        dwarf = elffile.get_dwarf_info()
        items = get_items_from_DWARF(dwarf, names=set(names))
        cus = {cu for cu, item in items.values()}
        cu2offset2die = {
            cu: {die.offset: die
                 for die in cu.iter_DIEs()}
            for cu in cus
        }
        for struct, (kind, value) in zip(structs, names):
            cu, item = items[kind, value]
            offset2die = cu2offset2die[cu]
            if kind == 'typedef':
                item = offset2die[item.attributes['DW_AT_type'].value]
            for field, offset in get_offsets_from_DIE(item, offset2die):
                yield struct, field, offset
Ejemplo n.º 17
0
class struct_parser(object):
    def __init__(self, path):
        with open(path, 'rb') as f:
            self.elffile = ELFFile(f)

            # check if we have debug info
            if not has_debug_info(self.elffile):
                print("File %s not have debug information!" % path)
                self.dw = 0
            else:
                self.dw = self.elffile.get_dwarf_info()

    def find_struct_die(self, sname):
        sname = bytes(sname, encoding="UTF-8")
        for cu in self.dw.iter_CUs():
            top_die = cu.get_top_DIE()
            for die in top_die.iter_children():
                if die.tag == "DW_TAG_structure_type":
                    if "DW_AT_name" in die.attributes:
                        if (die.attributes["DW_AT_name"].value == sname):
                            return die, cu
        return None

    def find_struct_die_with_filename(self, fname, sname):
        fname = bytes(fname, encoding="UTF-8")
        sname = bytes(sname, encoding="UTF-8")
        for cu in self.dw.iter_CUs():
            top_die = cu.get_top_DIE()
            if fname == top_die.attributes["DW_AT_name"].value:
                for die in top_die.iter_children():
                    if die.tag == "DW_TAG_structure_type":
                        if "DW_AT_name" in die.attributes:
                            if (die.attributes["DW_AT_name"].value == sname):
                                return die, cu
        return None
Ejemplo n.º 18
0
def get_frame_base(filename, pc, rebased_addr):
    """
    Call to get frame base
    :param filename: name of the executable file
    :param pc: The address of the beginning of the function
    :param rebased_addr: Should be project.loader.memory.min_addr
    :return: the frame base for the function
    """
    target_loc = pc - rebased_addr
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        min_greater = 1000000000000000000000
        offset = 0
        for CFI in dwarfinfo.EH_CFI_entries():
            if isinstance(CFI, FDE):
                decoded = CFI.get_decoded()
                for entry in decoded.table:
                    if entry['pc'] >= target_loc and entry['pc'] < min_greater:
                        offset = entry['cfa'].offset
                        min_greater = entry['pc']
        return offset
Ejemplo n.º 19
0
    def get_subprograms(self):
        """
        Generate the subprograms according to the dwarf info debugging information
        """
        f = open(self.path, 'rb')
        elffile = ELFFile(f)
        dwarfinfo = elffile.get_dwarf_info()
        for CU in dwarfinfo.iter_CUs():
            for DIE in CU.iter_DIEs():
                if DIE.tag == "DW_TAG_subprogram":
                    if "DW_AT_low_pc" in DIE.attributes.keys(
                    ) and "DW_AT_high_pc" in DIE.attributes.keys() and (
                            "DW_AT_name" in DIE.attributes.keys()
                            or "DW_AT_specification" in DIE.attributes.keys()):
                        ret = 0
                        lowpc = DIE.attributes["DW_AT_low_pc"].value
                        highpc = DIE.attributes["DW_AT_high_pc"].value
                        if DIE.attributes[
                                "DW_AT_high_pc"].form == "DW_FORM_data4":
                            highpc = lowpc + highpc
                        name = ""
                        ret = 0
                        num_args = 0
                        subprogram = Function(name, lowpc, highpc, "", "", ret,
                                              num_args, self)

                        self.subprograms.append(subprogram)
                        self.func_starts.append(subprogram.low_pc)
Ejemplo n.º 20
0
def load_dwarf_info(mmap):
    """ Load or reload all dwarf info from mmap. """
    for filename in mmap:
        if filename.startswith("["):
            continue
        elffile = ELFFile(open(filename, "rb"))
        if not elffile.has_dwarf_info():
            continue

        dwarfinfo = elffile.get_dwarf_info()
        # Information from Compilation Units (CUs)
        cus = []
        for cu in dwarfinfo.iter_CUs():
            lineprog = dwarfinfo.line_program_for_CU(cu)
            states = [
                entry.state for entry in lineprog.get_entries()
                if entry.state and not entry.state.end_sequence
            ]
            addresses = [state.address for state in states]
            dies = [{
                "entry": die,
                "bounds": die_bounds(die),
                "name": die.attributes['DW_AT_name'].value
            } for die in cu.iter_DIEs() if die.tag == 'DW_TAG_subprogram']
            cus.append({
                "lineprog": lineprog,
                "states": states,
                "addresses": addresses,
                "entries": dies
            })
        all_dwarf_info[filename] = {"dwarfinfo": dwarfinfo, "units": cus}
Ejemplo n.º 21
0
 def test_range_list_absence(self):
     with open(
             os.path.join('test', 'testfiles_for_unittests',
                          'arm_with_form_indirect.elf'), 'rb') as f:
         elffile = ELFFile(f)
         self.assertTrue(elffile.has_dwarf_info())
         self.assertIsNone(elffile.get_dwarf_info().range_lists())
Ejemplo n.º 22
0
def get_cfi(path):
    ''' Get the CFI entries from the ELF at the provided path '''

    try:
        with open(path, 'rb') as file_handle:
            elf_file = ELFFile(file_handle)

            if not elf_file.has_dwarf_info():
                print("No DWARF")
                return None

            dw_info = elf_file.get_dwarf_info()
            if dw_info.has_CFI():
                cfis = dw_info.CFI_entries()
            elif dw_info.has_EH_CFI():
                cfis = dw_info.EH_CFI_entries()
            else:
                print("No CFI")
                return None
    except ELFError:
        print("ELF Error")
        return None
    except DWARFError:
        print("DWARF Error")
        return None
    except PermissionError:
        print("Permission Error")
        return None
    except KeyError:
        print("Key Error")
        return None

    return cfis
    def run(self):
        elf = ELFFile(self.f)

        if not elf.has_dwarf_info():
            GLib.idle_add(self.window.display_error,
                          "This file has no DWARF info.")
            return

        di = elf.get_dwarf_info()

        builder = DwarfModelBuilder(di, self.verbose)
        total = builder.num_cus()
        n = 0

        generator = builder.build_step()
        file_elem = next(generator)
        while not file_elem:
            if self.stop_requested:
                return

            GLib.idle_add(self.window.load_progress, float(n) / total)
            n = n + 1
            file_elem = next(generator)

        #root_elem = builder.build()

        if self.stop_requested:
            return

        GLib.idle_add(self.window.done_loading, file_elem)
def translate_callstacks( callstack_to_count, executable_path, address_to_translation=None ):
    translated_callstack_to_count = {}
    with open( executable_path, "rb" ) as executable_infile:
        # Load in the executable
        elf_file = ELFFile( executable_infile )
        # Get its debug info
        dwarf_info = elf_file.get_dwarf_info()
        ## Translate 
        if address_to_translation is None:
            address_to_translation = {}
        for callstack,count in callstack_to_count.items():
            translated_callstack = []
            for address in callstack[:-1]:
                # If we haven't translated this address before, do so
                if address not in address_to_translation:
                    func_name = decode_address( dwarf_info, address )
                    if func_name is not None:
                        func_name = str( func_name, encoding="ascii" )
                        address_to_translation[ address ] = func_name
                # If we have, just look it up
                else:
                    func_name = address_to_translation[ address ]
                # Append the newly translated callstack
                translated_callstack.append( func_name )

            # Filter out any parts of the callstack that were not translated, 
            translated_callstack = list( filter( lambda x: x is not None, translated_callstack ) )
            # Tack MPI function back on
            translated_callstack = [ callstack[-1] ] + translated_callstack
            # Convert back to strings
            # Make tuple so key-able
            translated_callstack = tuple( reversed(translated_callstack) )
            translated_callstack_to_count[ translated_callstack ] = count
    return translated_callstack_to_count, address_to_translation
Ejemplo n.º 25
0
 def test_range_list_presence(self):
     with open(
             os.path.join('test', 'testfiles_for_unittests',
                          'sample_exe64.elf'), 'rb') as f:
         elffile = ELFFile(f)
         self.assertTrue(elffile.has_dwarf_info())
         self.assertIsNotNone(elffile.get_dwarf_info().range_lists())
Ejemplo n.º 26
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # The first DIE in each compile unit describes it.
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # Each DIE holds an OrderedDict of attributes, mapping names to
            # values. Values are represented by AttributeValue objects in
            # elftools/dwarf/die.py
            # We're interested in the DW_AT_name attribute. Note that its value
            # is usually a string taken from the .debug_str section. This
            # is done transparently by the library, and such a value will be
            # simply given as a string.
            name_attr = top_DIE.attributes['DW_AT_name']
            print('    name=%s' % bytes2str(name_attr.value))
Ejemplo n.º 27
0
 def test(self, file):
     """Checks if file contains DWARF debugging data"""
     try:
         elf_file = ELFFile(file)
         return elf_file.has_dwarf_info() and elf_file.get_dwarf_info(
         ).has_debug_info
     except ELFError:
         return False
Ejemplo n.º 28
0
def _get_dwarf_info(binary_path, relocate_dwarf_sections=True):
    with open(binary_path, 'rb') as f:
        elf_file = ELFFile(f)
        if not elf_file.has_dwarf_info():
            raise RuntimeError(f'{binary_path} has no DWARF info')
        dwarf_info = elf_file.get_dwarf_info(
            relocate_dwarf_sections=relocate_dwarf_sections)
    return dwarf_info
Ejemplo n.º 29
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' %
                  (CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    # Check if this attribute contains location information
                    # pdb.set_trace()
                    if loc_parser.attribute_has_location(attr, CU['version']):
                        var_name = DIE.attributes['DW_AT_name'].value
                        print(' Varname:%s' % (var_name))
                        print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                        loc = loc_parser.parse_from_attribute(
                            attr, CU['version'])
                        # We either get a list (in case the attribute is a
                        # reference to the .debug_loc section) or a LocationExpr
                        # object (in case the attribute itself contains location
                        # information).
                        if isinstance(loc, LocationExpr):
                            print('      %s' % (describe_DWARF_expr(
                                loc.loc_expr, dwarfinfo.structs)))
                        elif isinstance(loc, list):
                            print(show_loclist(loc, dwarfinfo,
                                               indent='      '))
Ejemplo n.º 30
0
    def test_empty_pubtypes(self):
        test_dir = os.path.join('test', 'testfiles_for_unittests')
        with open(os.path.join(test_dir, 'empty_pubtypes', 'main'), 'rb') as f:
            elf = ELFFile(f)

            # This test targets `ELFParseError` caused by buggy handling
            # of ".debug_pubtypes" section which only has zero terminator
            # entry.
            self.assertEqual(len(elf.get_dwarf_info().get_pubtypes()), 0)
Ejemplo n.º 31
0
    def test_empty_pubtypes(self):
        test_dir = os.path.join('test', 'testfiles_for_unittests')
        with open(os.path.join(test_dir, 'empty_pubtypes', 'main'), 'rb') as f:
            elf = ELFFile(f)

            # This test targets `ELFParseError` caused by buggy handling
            # of ".debug_pubtypes" section which only has zero terminator
            # entry.
            self.assertEqual(len(elf.get_dwarf_info().get_pubtypes()), 0)
Ejemplo n.º 32
0
    def __init__(self, stream: TraceStream, elf_file: io.IOBase, basedir: str=''):
        self.stream = stream
        self.basedir = basedir

        self.cache = {}
        elf = ELFFile(elf_file)
        if not elf.has_dwarf_info():
            raise ValueError(f'ELF file {elf} has no DWARF info')

        self.dwarf = elf.get_dwarf_info()
Ejemplo n.º 33
0
    def test_dwarfv5_parses(self):
        dwarfv5_basic = os.path.join('test', 'testfiles_for_unittests', 'dwarfv5_basic.elf')
        with open(dwarfv5_basic, 'rb') as f:
            elf = ELFFile(f)
            # DWARFv5 debugging information is detected.
            self.assertTrue(elf.has_dwarf_info())

            # Fetching DWARFInfo for DWARFv5 doesn't completely explode.
            dwarf = elf.get_dwarf_info()
            self.assertIsNotNone(dwarf)
Ejemplo n.º 34
0
 def test_die_size(self):
     with open(
             os.path.join('test', 'testfiles_for_unittests',
                          'trailing_null_dies.elf'), 'rb') as f:
         elffile = ELFFile(f)
         self.assertTrue(elffile.has_dwarf_info())
         dwarfinfo = elffile.get_dwarf_info()
         for CU in dwarfinfo.iter_CUs():
             for child in CU.get_top_DIE().iter_children():
                 self.assertEqual(child.size, 3)
Ejemplo n.º 35
0
def get_type_from_file(filename, name):
    with open(str(filename), 'rb') as f:
        elffile = ELFFile(f)
        dwarf_info = elffile.get_dwarf_info()
        cu = list(dwarf_info.iter_CUs())[0]
        die = [
            die for die in (cu.iter_DIEs()) if 'DW_AT_name' in die.attributes
            and die.attributes['DW_AT_name'].value == name.encode('utf-8')
        ][0]
        return get_die_type(filename, cu, die)
Ejemplo n.º 36
0
def fetch_lineno(bin_name, func_addrs):
    addr_to_line = {}
    with open(bin_name, "rb") as f:
        elffile = ELFFile(f)
        if not elffile.has_dwarf_info():
            print("No Dwarf Found in ", bin_name)
        else:
            dwarf = elffile.get_dwarf_info()
            addr_to_line = decode_file_line(dwarf, func_addrs)

    return addr_to_line
Ejemplo n.º 37
0
def get_producer(path):
    with open(path, "rb") as f:
        elffile = ELFFile(f)
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units
            # contained in the .debug_info section. CU is a CompileUnit
            # object, with some computed attributes (such as its offset
            # in the section) and a header which conforms to the DWARF
            # standard. The access to header elements is, as usual, via
            # item-lookup.
            # print('  Found a compile unit at offset %s, length %s' % (
            #    CU.cu_offset, CU['unit_length']))

            # Start with the top DIE, the root for this CU's DIE tree
            top_DIE = CU.get_top_DIE()
            try:
                attrs = top_DIE.attributes['DW_AT_producer']
                if attrs.form == 'DW_FORM_GNU_strp_alt':
                    # DWARF extensions elfutils recognizes/supports are
                    # described at,
                    #
                    # https://fedorahosted.org/elfutils/wiki/DwarfExtensions
                    #
                    # You can find the alt dwz file by reading the
                    # .gnu_debugaltlink section. Which contains a file name
                    # followed by the build-id of the dwz file. The build-id
                    # symlink will point at the /usr/lib/debug/.dwz/ file.
                    #
                    # export nm=".gnu_debugaltlink"
                    # objdump -s -j $nm /usr/lib/debug/.build-id/XY/34...debug
                    # print("DWZ has the string!")
                    #
                    # DW_FORM_GNU_ref_alt is like DW_FORM_ref, but it refers to
                    # an offset in the .dwz file, not in the main file.
                    # DW_FORM_GNU_strp_alt is like DW_FORM_strp, but it refers
                    # to a string in the .dwz file, not in the main file.
                    for section in elffile.iter_sections():
                        name = bytes2str(section.name)
                        if name == ".gnu_debugaltlink":
                            data = section.data()
                            fdata = data[0:data.find(b"\x00")]
                            i = fdata.find(".dwz/")
                            rpath = os.path.join("/usr/lib/debug/",
                                                 fdata[i:].decode("utf-8"))
                            # offset in alternate (.dwz/...)'s .debug_str"
                            return get_dwz(rpath, offset=attrs.value)
                elif attrs.form == 'DW_FORM_strp':  # lucky ;)
                    return attrs.value
                else:
                    assert 0
            except:
                pass
Ejemplo n.º 38
0
class LocateUndef(object):
    """ Locate symbol in the compilation unit. For each symbol (by a name string) returns an
        object of the pyelftools/dwarf/die type (with includes reference to the compilation
        unit as an attribute.

        Public methods:

            findDies -- for the list of names (of the symbols) prepares the list of
                references to DIE
                
            getDies -- return the list of found DIEs
    """

    def __init__(self,fname):
        """ fname:
                file name of object file 
        """
        self.dies = {}
        self.fh = open( fname, 'rb' )
        if self.fh:
            self.elffile = ELFFile(self.fh)
            if not self.elffile.has_dwarf_info():
                return

            self.dwarfinfo = self.elffile.get_dwarf_info()


        else:
            raise IOError

    def __del__(self):
        if self.fh:
            self.fh.close()

    def findDies( self, namesList ):
        """ namesList -- list of symbol names (strings). Each symbols is expected to be once
            in the nameList"""
        for cu in self.dwarfinfo.iter_CUs():
            for die in cu.iter_DIEs():
                if die.is_null():
                    continue

                if 'DW_AT_name' in die.attributes:
                    name = die.attributes['DW_AT_name'].value.decode('ascii')
                    if name in namesList:
                        self.dies[name] = (cu,die)
                        namesList.remove(name) # not sure whether it works
                                               # have to check false definitions 
                        if len(namesList) < 1:
                            return

    def getDies(self):
        """ returns list of DIEs object (its name is DIE.attributes['DW_AT_name'] """
        return self.dies
Ejemplo n.º 39
0
    def read(self, view):
        self.log.info('Reading file %s', self.file)
        with open(self.file, "rb") as f:
            elffile = ELFFile(f)
            if not elffile.has_dwarf_info():
                print('  file has no DWARF info')
                return

            # get_dwarf_info returns a DWARFInfo context object, which is the
            # starting point for all DWARF-based processing in pyelftools.
            dwarfinfo = elffile.get_dwarf_info()

            for CU in dwarfinfo.iter_CUs():
                #self.symbols.append(DwarfSymbol.new_from_die(CU.get_top_DIE()))

                # DWARFInfo allows to iterate over the compile units contained in
                # the .debug_info section. CU is a CompileUnit object, with some
                # computed attributes (such as its offset in the section) and
                # a header which conforms to the DWARF standard. The access to
                # header elements is, as usual, via item-lookup.
                print('  Found a compile unit at offset %s, length %s' % (
                    CU.cu_offset, CU['unit_length']))

                # structs = [die for die in CU.iter_DIEs() if die.tag=='DW_TAG_structure_type']
                for die in CU.iter_DIEs():
                    #print('DIE %s' % (self.die_repr(die)))
                    if 'DW_TAG_structure_type' == die.tag:
                        if 'DW_AT_name' in die.attributes:
                            name = die.attributes['DW_AT_name'].value.decode()
                        else:
                            name = "{}:{}".format(
                                die.attributes['DW_AT_decl_file'].value,
                                die.attributes['DW_AT_decl_line'].value
                            )
                        if 'DW_AT_byte_size' not in die.attributes:
                            continue
                        size = die.attributes['DW_AT_byte_size'].value
                        members = []
                        if die.has_children:
                            for child in die.iter_children():
                                if 'DW_TAG_member' == child.tag:
                                    members.append(child.attributes['DW_AT_name'].value.decode())
                                    pass
                                pass
                            pass
                        view.add(name, "Struct", size, members)
                        pass
                    pass
                pass
            #for s in self.symbols:
            #    print('Sym: %s' % str(s))
            #    pass
            pass
Ejemplo n.º 40
0
def process_dwarf_info(in_file, out_file):
  '''
    Main function processing the dwarf informations from debug sections
  '''
  DEBUG('Processing file: {0}'.format(in_file))
    
  with open(in_file, 'rb') as f:
    f_elf = ELFFile(f)    
    if not f_elf.has_dwarf_info():
      DEBUG("{0} has no debug informations!".format(file))
      return False
        
    M = CFG_pb2.Module()
    M.name = "GlobalVariable".format('utf-8')
    
    set_global_machine_arch(f_elf.get_machine_arch())
    dwarf_info = f_elf.get_dwarf_info()
    process_types(dwarf_info, TYPES_MAP)    
    process_frames(dwarf_info, EH_FRAMES)
    section_offset = dwarf_info.debug_info_sec.global_offset
    
    # Iterate through all the compile units
    for CU in dwarf_info.iter_CUs():
      DEBUG('Found a compile unit at offset {0}, length {1}'.format(CU.cu_offset, CU['unit_length']))
      top_DIE = CU.get_top_DIE()
      c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset)
      c_unit.decode_control_unit(M, GLOBAL_VARIABLES)
        
    for key, value in GLOBAL_VARIABLES.iteritems():
      if value["size"] > 0:
        gvar = M.global_vars.add()
        gvar.name = value["name"]
        gvar.ea = value["addr"]
        gvar.size = value["size"]
      else:
        DEBUG("Look for {}".format(pprint.pformat(value)))
        
    #for func in M.funcs:
    #  DEBUG("Function name {}".format(func.name))
    #  for sv in func.stackvars:
    #    DEBUG_PUSH()
    #    DEBUG("{} : {}, ".format(sv.name, sv.sp_offset))
    #    DEBUG_POP()
        
            
    with open(out_file, "w") as outf:
      outf.write(M.SerializeToString())
     
  DEBUG("Global Vars\n")
  DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES)))
  DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES)))
  DEBUG("End Global Vars\n")
Ejemplo n.º 41
0
def process_file(filename):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)
        # elfclass is a public attribute of ELFFile, read from its header
        print('%s: elfclass is %s' % (filename, elffile.elfclass))

        if elffile.has_dwarf_info():
            dwarfinfo = elffile.get_dwarf_info()
            for CU in dwarfinfo.iter_CUs():
                # cu_offset is a public attribute of CU
                # address_size is part of the CU header
                print('  CU at offset 0x%x. address_size is %s' % (
                    CU.cu_offset, CU['address_size']))
Ejemplo n.º 42
0
    def test_DWARF_indirect_forms(self):
        # This file uses a lot of DW_FORM_indirect, and is also an ARM ELF
        # with non-trivial DWARF info.
        # So this is a simple sanity check that we can successfully parse it
        # and extract the expected amount of CUs.
        with open(os.path.join('test', 'testfiles_for_unittests',
                               'arm_with_form_indirect.elf'), 'rb') as f:
            elffile = ELFFile(f)
            self.assertTrue(elffile.has_dwarf_info())

            dwarfinfo = elffile.get_dwarf_info()
            all_CUs = list(dwarfinfo.iter_CUs())
            self.assertEqual(len(all_CUs), 9)
Ejemplo n.º 43
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # Start with the top DIE, the root for this CU's DIE tree
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # Each DIE holds an OrderedDict of attributes, mapping names to
            # values. Values are represented by AttributeValue objects in
            # elftools/dwarf/die.py
            # We're interested in the filename, which is the join of
            # 'DW_AT_comp_dir' and 'DW_AT_name', either of which may be
            # missing in practice. Note that its value
            # is usually a string taken from the .debug_string section. This
            # is done transparently by the library, and such a value will be
            # simply given as a string.
            try:
                comp_dir_attr = top_DIE.attributes['DW_AT_comp_dir']
                comp_dir = bytes2str(comp_dir_attr.value)
                try:
                    name_attr = top_DIE.attributes['DW_AT_name']
                    name = bytes2str(name_attr.value)
                    name = os.path.join(comp_dir, name)
                except KeyError as e:
                    name = comp_dir
            except KeyError as e:
                name_attr = top_DIE.attributes['DW_AT_name']
                name = bytes2str(name_attr.value)
            print('    name=%s' % name)

            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE)
Ejemplo n.º 44
0
def generate_docs_from_stream(filename, fd):
	elffile = ELFFile(fd)

	if not elffile.has_dwarf_info():
		print('    ' + filename + ' has no DWARF info')
		return []

	dwarfinfo = elffile.get_dwarf_info()

	# cu: compilation unit
	# DIE: debug information entry
	
	docs = [mdoc((cu, cu.get_top_DIE())) for cu in dwarfinfo.iter_CUs()]
	return sequence(docs)
Ejemplo n.º 45
0
    def extract(self, binary):
        protos = dict()
        with open(binary, 'rb') as f:
            elf_file = ELFFile(f)

            if not elf_file.has_dwarf_info():
                print('    File has no debug info (DWARF format expected) !')
                return protos

            dwarf_info = elf_file.get_dwarf_info()
            for CU in dwarf_info.iter_CUs():
                for DIE in CU.iter_DIEs():
                    self.__extract_DIE(CU, DIE, protos)

        return protos
Ejemplo n.º 46
0
class SharedObjectInfo():
    def __init__(self, path, baddr):
        self.path = path
        self._set_elf_file()

        self.low_addr = baddr
        self.high_addr = baddr + self._get_mem_size()

        # Check whether the ELF file is position independent code
        self.is_pic = self.elf_file.header['e_type'] == 'ET_DYN'

        # Don't set the so info's dwarf_info initially, only when
        # symbol lookup is first required
        self._dwarf_info = None

    @property
    def dwarf_info(self):
        if self._dwarf_info is None:
            self._set_dwarf_info()

        return self._dwarf_info

    def _set_elf_file(self):
        try:
            binary_file = open(self.path, 'rb')
            self.elf_file = ELFFile(binary_file)
        except IOError:
            print('Failed to open ' + self.path, file=sys.stderr)
            sys.exit(-1)

    def _set_dwarf_info(self):
        if not self.elf_file.has_dwarf_info():
            print('Binary ' + self.path + ' has no DWARF info',
                  file=sys.stderr)
            sys.exit(-1)

        self._dwarf_info = self.elf_file.get_dwarf_info()

    def _get_mem_size(self):
        mem_size = 0
        for segment in self.elf_file.iter_segments():
            if segment['p_type'] == 'PT_LOAD':
                alignment = segment['p_align']
                segment_size = segment['p_memsz']
                aligned_size = math.ceil(segment_size / alignment) * alignment
                mem_size += aligned_size

        return mem_size
Ejemplo n.º 47
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    if attribute_has_location_list(attr):
                        # This is a location list. Its value is an offset into
                        # the .debug_loc section, so we can use the location
                        # lists object to decode it.
                        loclist = location_lists.get_location_list_at_offset(
                            attr.value)

                        print('   DIE %s. attr %s.\n%s' % (
                            DIE.tag,
                            attr.name,
                            show_loclist(loclist, dwarfinfo, indent='      ')))
Ejemplo n.º 48
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The range lists are extracted by DWARFInfo from the .debug_ranges
        # section, and returned here as a RangeLists object.
        range_lists = dwarfinfo.range_lists()
        if range_lists is None:
            print('  file has no .debug_ranges section')
            return

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset {0!s}, length {1!s}'.format(
                CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    if attribute_has_range_list(attr):
                        # This is a range list. Its value is an offset into
                        # the .debug_ranges section, so we can use the range
                        # lists object to decode it.
                        rangelist = range_lists.get_range_list_at_offset(
                            attr.value)

                        print('   DIE {0!s}. attr {1!s}.\n{2!s}'.format(
                            DIE.tag,
                            attr.name,
                            rangelist))
Ejemplo n.º 49
0
def process_file(filename, outfile):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            raise IOError("ERROR: {} has no DWARF info".format(filename))

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        with open(outfile, 'w') as outFp:
            for CU in dwarfinfo.iter_CUs():
                dies = get_dies_by_offset(CU)
                types = get_serialization_types(["object_t"], CU)

                emit.emit_serializers(outFp, types)
Ejemplo n.º 50
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('filename', nargs='?', default='a.out')
    parser.add_argument('--ignore', action='append', default=[])
    parser.add_argument('--demangle', action='store_true')
    parser.add_argument('--declaration', action='store_true')
    args = parser.parse_args()

    filename = args.filename
    ignore = tuple(args.ignore)

    with open(filename, 'rb') as input_file:
        elf = ELFFile(input_file)
        dwarf = elf.get_dwarf_info()

        def iter_inlined():
            for cu in dwarf.iter_CUs():
                # iterate over all functions
                functions = iter_functions(cu)
                # only include inlined functions
                functions = (func for func in functions if func.is_inlined)
                # skip functions with no filename assigned
                functions = (func for func
                             in functions
                             if func.filename)
                # skip ignored functions from ignored files
                if ignore:
                    functions = (func for func
                                 in functions
                                 if not func.filename.startswith(ignore))
                # skip unnamed functions
                functions = (func for func in functions if func.linkage_name)

                for func in functions:
                    yield func

        functions = sorted(set(iter_inlined()))

        for func in functions:
            if args.declaration:
                print '%s:%i' % (func.filename, func.line),
            if args.demangle:
                print func.linkage_name_unmangled
            else:
                print func.linkage_name
Ejemplo n.º 51
0
def process_file(filename, address):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        funcname = decode_funcname(dwarfinfo, address)
        file, line = decode_file_line(dwarfinfo, address)

        print('Function:', bytes2str(funcname))
        print('File:', bytes2str(file))
        print('Line:', line)
Ejemplo n.º 52
0
def test_files(fns,quiet=False,profile=False):
  for fn in fns:
    try:
      elf = ELFFile(open(fn))
    except ELFError:
      if not quiet:
        print "Skipping non-ELF file:",fn
      continue

    if not elf.has_dwarf_info():
      if not quiet:
        print "No dwarf info for {}.".format(fn)
      continue

    dwarfinfo = elf.get_dwarf_info()
    dwarf_functions = get_functions(dwarfinfo)

    engine_functions = {}
    for engine in ENGINES:
      this_engine = Static(fn, debug=0, static_engine=engine) #no debug output
      if args.profile:
        #needs pycallgraph
        from pycallgraph import PyCallGraph
        from pycallgraph.output import GraphvizOutput
        graphviz = GraphvizOutput()
        graphviz.output_file = 'prof.png'
        with PyCallGraph(output=graphviz):
          this_engine.process()
      else:
        this_engine.process()
      engine_functions[engine] = {x.start for x in this_engine['functions']}

    for engine,functions in engine_functions.iteritems():
      missed = dwarf_functions - functions
      total_fxns = len(dwarf_functions)
      short_fn = fn.split("/")[-1] if "/" in fn else fn
      if len(missed) == 0:
        print "{} {}: {} found all {} function(s).".format(ok_green, short_fn, engine, total_fxns)
      else:
        fmt = "{} {}: {} missed {}/{} functions: {}."
        print fmt.format(warn, short_fn, engine,
                len(missed), total_fxns, ", ".join(hex(fxn) for fxn in missed))
Ejemplo n.º 53
0
def get_files_from_executable(filename):
    with open(filename, 'rb') as f:
        # ELFFile looks for magic number, if there's none, ELFError is raised
        try:
            elffile = ELFFile(f)
        except ELFError:
            logging.info("%s is invalid elf file" % filename)
            return []

        if not elffile.has_dwarf_info():
            logging.info("File does not have dwarf info, no sources in the project file")
            return
        dwarfinfo = elffile.get_dwarf_info()

    files = []
    # Go over all the line programs in the DWARF information and get source files paths
    for CU in dwarfinfo.iter_CUs():
        top_DIE = CU.get_top_DIE()
        files.append(top_DIE.get_full_path())
    return files
Ejemplo n.º 54
0
def get_executable_src_files(exec_path):
    assert (os.path.isabs(exec_path))

    exec_src_paths = []
    with open(exec_path, 'rb') as elf_file_handle:
        try:
            elf_file = ELFFile(elf_file_handle)
        except:
            print ('-- Executable \'' + exec_path + '\' is not an ELF file')
            return []

        if not elf_file.has_dwarf_info():
            print ('-- Executable \'' + exec_path + '\' has no DWARF information')
            return []

        dwarf_info = elf_file.get_dwarf_info()
        for CU in dwarf_info.iter_CUs():
            DIE = CU.get_top_DIE()

            name = ''
            comp_dir = ''
            for attr in itervalues(DIE.attributes):
                if attr.name == 'DW_AT_name':
                    name = attr.value
                if attr.name == 'DW_AT_comp_dir':
                    comp_dir = attr.value

            # If the source path in the executable is not an absolute
            # path then use the DW_AT_comp_dir attribute to get the
            # build directory to make it absolute
            #
            # Once we have an absolute path, use realpath to resolve any
            # symbolic links
            src_path = name
            if not os.path.isabs(name):
                assert (comp_dir != '')
                src_path = os.path.join(comp_dir, name)
                assert(os.path.isabs(src_path))
                src_path = os.path.realpath(src_path)
            exec_src_paths.append(src_path.decode())
    return exec_src_paths
Ejemplo n.º 55
0
def get_producer(debugfile, dwzfile, fast):
    elffile = ELFFile(debugfile)
    dwarfinfo = elffile.get_dwarf_info()

    producers = set()

    for CU in dwarfinfo.iter_CUs():
        # Start with the top DIE, the root for this CU's DIE tree
        top_DIE = CU.get_top_DIE()
        try:
            attrs = top_DIE.attributes['DW_AT_producer']
            if attrs.form == 'DW_FORM_GNU_strp_alt':
                producers.add(get_dwz(dwzfile, offset=attrs.value))
            elif attrs.form == 'DW_FORM_strp':  # lucky ;)
                producers.add(attrs.value)
            else:
                assert 0
            if fast:  # one producer is enough ;(
                break
        except:
            pass

    return producers
Ejemplo n.º 56
0
def process_file(filename):
    f = open(filename, 'r+b')

    elffile = ELFFile(f)
    symtab, ftable_addr = get_symtab(elffile)

    if symtab is None:
        print "Cannot find symbol table. Compiled without debug symbols?"
        sys.exit(1)

    if ftable_addr is None:
        print "The provided file does not contain symbol `%s'" % FTABLE
        print "Please ensure there is a reference to `%s' in traceback.c" % FTABLE
        sys.exit(1)

    rodata_addr, rodata_off = find_rodata(elffile)

    # get_dwarf_info returns a DWARFInfo context object, which is the
    # starting point for all DWARF-based processing in pyelftools.
    dwarfinfo = elffile.get_dwarf_info()

    typemap = dict()

    process_types(dwarfinfo, typemap)
    process_funcs(dwarfinfo, symtab, typemap)

    i = 0
    f.seek(ftable_addr - rodata_addr + rodata_off)
    for func in sorted(symtab, key=lambda x : symtab[x].offset):
        if len(func) == 0:
            continue
        if i > FUNCTS_MAX_NUM:
            break
        write_func(f, func, symtab[func])
        i += 1
    f.close()
Ejemplo n.º 57
0
class Image(object):
    def __init__(self, fname):
        if platform.system() == "Windows":
            elf_data = open(fname, "r")
        else:     
            with open(fname, "r") as f:
                elf_data = StringIO(f.read())
        
        self.elf = ELFFile(elf_data)
        if self.elf.has_dwarf_info():
            self.dwarf = self.elf.get_dwarf_info()
            set_global_machine_arch(self.elf.get_machine_arch())
            self.__tame_dwarf()
            self.get_expr_evaluator = lambda: ExprLiveEval(self)

    @property
    def executable(self):
        try:
            return self._exe
        except:
            self._exe = self._build_executable()
        return self._exe

    def _build_executable(self):
        s = self.elf.get_section(1)
        assert s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS"        
        base_addr = s.header["sh_addr"]
        
        img = s.data()

        s = self.elf.get_section(2)
        if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS":
            if s.header["sh_addr"] != base_addr + len(img):
                raise Exception("bad section vaddr - #2 should follow #1")

            img += s.data()

            s = self.elf.get_section(3)
            print "%s" % str(s.header)
            if s.header["sh_flags"] & 2 and s.header["sh_type"] == "SHT_PROGBITS":
                if s.header["sh_addr"] != base_addr + len(img):
                    raise Exception("bad section vaddr - #3 should follow #2")

                img += s.data()

        return (base_addr, img)

    def __tame_dwarf(self):
        dw = self.dwarf
        self._compile_units = {}
        self._addresses = {}
        self._lowest_known_address = None
        
        location_lists = dw.location_lists()
            
        
        cfi = None
        if dw.has_EH_CFI():
            cfi = dw.EH_CFI_entries()
            print "we have EH CFI entries"
        elif dw.has_CFI():
            cfi = dw.CFI_entries()
            print "we have CFI entries"
        
        else:
            print "no (EH) CFI"

        if None is not cfi:
            self._cfa_rule = {}
            for c in cfi:
                try:
                    decoded = c.get_decoded()
                except:
                    print "CFI decoding exception"
                    break

                for entry in decoded.table:
                    if entry["pc"] in self._cfa_rule:
                        print "duplicate cfa rule found at pc %x" % entry["pc"]
                        print "\t%s" % str(self._cfa_rule[entry["pc"]])
                        print "\t%s" % str(entry)
                        print
                    #assert (not entry["pc"] in self._cfa_rule) or (self._cfa_rule[entry["pc"]] == entry)
                    self._cfa_rule[entry["pc"]] = entry


            
        
        for c in dw.iter_CUs():
            functions = {}  
            variables = {}

            td = c.get_top_DIE()

            for d in td.iter_children():
                if d.tag == 'DW_TAG_subprogram':
                    if 'DW_AT_declaration' in d.attributes:
                        continue
                    lpc = d.attributes['DW_AT_low_pc'].value
                    hpc = d.attributes['DW_AT_high_pc'].value
                    if hpc < lpc:
                        hpc += lpc

                    function_name = d.attributes['DW_AT_name'].value
                    f = {}
                    f["lpc"] = lpc
                    f["hpc"] = hpc
                    f["args"] = {}
                    f["vars"] = {}
                    if 'DW_AT_frame_base' in d.attributes:
                        a = d.attributes['DW_AT_frame_base']
                        if a.form == 'DW_FORM_data4' or a.form == 'DW_FORM_sec_offset':
                            f["fb"] = location_lists.get_location_list_at_offset(a.value)
                        else:
                            f["fb"] = a.value
                    
                    for child in d.iter_children():
                        if child.tag == "DW_TAG_formal_parameter":
                            name = child.attributes['DW_AT_name'].value
                            v = {}
                            try:
                                if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']:
                                    v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value)
                                else:
                                    v["location"] = child.attributes['DW_AT_location'].value
                            except:
                                v["location"] = []
                            f["args"][name] = v
                        if child.tag == "DW_TAG_variable":
                            name = child.attributes['DW_AT_name'].value
                            v = {}
                            try:
                                if child.attributes['DW_AT_location'].form in ['DW_FORM_sec_offset', 'DW_FORM_data4']:
                                    v["location"] = location_lists.get_location_list_at_offset(child.attributes['DW_AT_location'].value)
                                else:
                                    v["location"] = child.attributes['DW_AT_location'].value
                            except:
                                v["location"] = []
                            f["vars"][name] = v

                    functions[function_name] = f
                elif d.tag == 'DW_TAG_variable':
                    if d.attributes['DW_AT_decl_file'].value == 1:
                        try:
                            name = d.attributes['DW_AT_name'].value
                        except:
                            name = '(%s)' % str(d.attributes['DW_AT_name'])
                            
                        v = {}
                        try:
                            v["location"] = d.attributes['DW_AT_location'].value
                        except:
                            v["location"] = []
                        variables[name] = v

            x = {}

            fname = td.attributes['DW_AT_name'].value
            x["line_program"] = dw.line_program_for_CU(c).get_entries()
            x["lpc"] = td.attributes['DW_AT_low_pc'].value
            x["hpc"] = td.attributes['DW_AT_high_pc'].value
            x["comp_dir"] = td.attributes['DW_AT_comp_dir'].value
            x["functions"] = functions
            x["variables"] = variables

            self._compile_units[fname] = x
            if ((self._lowest_known_address is None) or
                    (self._lowest_known_address > x["lpc"])):
                self._lowest_known_address = x["lpc"]

            
        for c in self._compile_units:
            self._compile_units[c]["lines"] = {}
            for line in self._compile_units[c]["line_program"]:
                state = line.state
                if state is not None and not (state.end_sequence or state.basic_block or state.epilogue_begin or state.prologue_end):
                    cl = "%s+%d" % (c, state.line)
                    if state.address in self._addresses and self._addresses[state.address] != cl:
                        raise Exception("addr %x is both \"%s\" and \"%s+%d\"" % (state.address, self._addresses[state.address], c, state.line))
                    self._addresses[state.address] = cl
                    try: self._compile_units[c]["lines"][state.line] += [state.address]
                    except: self._compile_units[c]["lines"][state.line] = [state.address]
        
        if not cfi is None:
            print "CFA table:"
            for pc in sorted(self._cfa_rule.keys()):
                print "%x: %s\t\t(%s)" % (pc, str(self._cfa_rule[pc]), self.addr2line(pc))

    def addr2line(self, addr):
        try: return self._addresses[addr]
        except: return ''

    def loc_at(self, addr):
        line = self.addr2line(addr)
        while '' == line and addr >= self._lowest_known_address:
            addr -= 4
            line = self.addr2line(addr)
        if '' == line:
            return ("unknown", "", 0, "")

        cuname, culine = line.split("+")
        fname = ""
        c = self._compile_units[cuname]
        for f in c["functions"]:
            if ((c["functions"][f]["lpc"] <= addr) and
                    (c["functions"][f]["hpc"] >= addr)):
                fname = f
                break
        return (fname, cuname, culine, c["comp_dir"])

    def line2addr(self, fname, line):
        return self._compile_units[fname]["lines"][line]	
Ejemplo n.º 58
0
# please ignore it!
#
from __future__ import print_function

import sys, pprint
from elftools.elf.structs import ELFStructs
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import *

from elftools.elf.relocation import *


stream = open('test/testfiles/exe_simple64.elf', 'rb')

efile = ELFFile(stream)
print('elfclass', efile.elfclass)
print('===> %s sections!' % efile.num_sections())
print(efile.header)

dinfo = efile.get_dwarf_info()
from elftools.dwarf.locationlists import LocationLists
from elftools.dwarf.descriptions import describe_DWARF_expr
llists = LocationLists(dinfo.debug_loc_sec.stream, dinfo.structs)
for loclist in llists.iter_location_lists():
    print('----> loclist!')
    for li in loclist:
        print(li)
        print(describe_DWARF_expr(li.loc_expr, dinfo.structs))