Python DebugSectionDescriptor Examples

Programming Language: Python

Namespace/Package Name: elftools.dwarf.dwarfinfo

Examples at hotexamples.com: 6

Python DebugSectionDescriptor - 6 examples found. These are the top rated real world Python examples of elftools.dwarf.dwarfinfo.DebugSectionDescriptor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DebugSectionDescriptor(6)

Frequently Used Methods

DebugSectionDescriptor (6)

Example #1

Show file

File: test_refaddr_bitness.py Project: woodruffw-forks/pyelftools

    def test_main(self):
        # Read the three saved sections as bytestreams
        with open(
                os.path.join('test', 'testfiles_for_unittests',
                             'arm64_on_dwarfv2.info.dat'), 'rb') as f:
            info = f.read()
        with open(
                os.path.join('test', 'testfiles_for_unittests',
                             'arm64_on_dwarfv2.abbrev.dat'), 'rb') as f:
            abbrev = f.read()
        with open(
                os.path.join('test', 'testfiles_for_unittests',
                             'arm64_on_dwarfv2.str.dat'), 'rb') as f:
            str = f.read()

        # Parse the DWARF info
        di = DWARFInfo(
            config=DwarfConfig(little_endian=True,
                               default_address_size=8,
                               machine_arch="ARM64"),
            debug_info_sec=DebugSectionDescriptor(io.BytesIO(info),
                                                  '__debug_info', None,
                                                  len(info), 0),
            debug_aranges_sec=None,
            debug_abbrev_sec=DebugSectionDescriptor(io.BytesIO(abbrev),
                                                    '__debug_abbrev', None,
                                                    len(abbrev), 0),
            debug_frame_sec=None,
            eh_frame_sec=None,
            debug_str_sec=DebugSectionDescriptor(io.BytesIO(str),
                                                 '__debug_str', None, len(str),
                                                 0),
            debug_loc_sec=None,
            debug_ranges_sec=None,
            debug_line_sec=None,
            debug_pubtypes_sec=None,
            debug_pubnames_sec=None,
            debug_addr_sec=None,
            debug_str_offsets_sec=None,
            debug_line_str_sec=None,
        )

        CUs = [cu for cu in di.iter_CUs()]
        # Locate a CU that I know has a reference in DW_FORM_ref_addr form
        CU = CUs[21]
        self.assertEqual(CU['version'], 2)
        # Make sure pyelftools appreciates the difference between the target address size and DWARF inter-DIE offset size
        self.assertEqual(CU.structs.dwarf_format, 32)
        self.assertEqual(CU['address_size'], 8)
        DIEs = [die for die in CU.iter_DIEs()]
        # Before the patch, DIE #2 is misparsed, the current offset is off, the rest are misparsed too
        self.assertEqual(len(DIEs), 15)

Example #2

Show file

def fake_section(filename, section_name):

    with open(os.path.splitext(filename)[0] + section_name, 'rb') as f:
        debug_bytes = f.read()

    return DebugSectionDescriptor(stream=io.BytesIO(debug_bytes),
                                  name=section_name,
                                  global_offset=0,
                                  size=len(debug_bytes))

Example #3

Show file

def read_macho(filename, resolve_arch, friendly_filename):
    from filebytes.mach_o import MachO, CpuType, TypeFlags, LC
    fat_arch = None
    macho = MachO(filename)
    if macho.isFat:
        slices = [make_macho_arch_name(slice) for slice in macho.fatArches]
        arch_no = resolve_arch(slices)
        if arch_no is None: # User cancellation
            return False
        fat_arch = slices[arch_no]
        macho = macho.fatArches[arch_no]

    # We proceed with macho being a arch-specific file, or a slice within a fat binary
    data = {
        section.name: DebugSectionDescriptor(io.BytesIO(section.bytes), section.name, None, len(section.bytes), 0)
        for cmd in macho.loadCommands
        if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64)
        for section in cmd.sections
        if section.name.startswith('__debug')
    }

    #macho_save_sections(friendly_filename, macho)

    if not '__debug_info' in data:
        return None

    cpu = macho.machHeader.header.cputype
    di = DWARFInfo(
        config = DwarfConfig(
            little_endian=True,
            default_address_size = 8 if (cpu & TypeFlags.ABI64) != 0 else 4,
            machine_arch = make_macho_arch_name(macho)
        ),
        debug_info_sec = data['__debug_info'],
        debug_aranges_sec = data.get('__debug_aranges'),
        debug_abbrev_sec = data['__debug_abbrev'],
        debug_frame_sec = data.get('__debug_frame'),
        eh_frame_sec = None, # Haven't seen those in Mach-O
        debug_str_sec = data['__debug_str'],
        debug_loc_sec = data.get('__debug_loc'),
        debug_ranges_sec = data.get('__debug_ranges'),
        debug_line_sec = data.get('__debug_line'),
        debug_pubtypes_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubn?
        debug_pubnames_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubt?
    )
    di._format = 1
    di._fat_arch = fat_arch
    text_cmd = next((cmd for cmd in macho.loadCommands if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64) and cmd.name == "__TEXT"), False)
    di._start_address = text_cmd.header.vmaddr if text_cmd else 0
    return di

Example #4

Show file

def read_pe(filename):
    from filebytes.pe import PE, IMAGE_FILE_MACHINE

    pefile = PE(filename)

    # Section's real size might be padded - see https://github.com/sashs/filebytes/issues/28
    sections = [(section.name, section,
        section.header.PhysicalAddress_or_VirtualSize,
        section.header.SizeOfRawData)
        for section in pefile.sections
        if section.name.startswith('.debug')]

    data = {name: DebugSectionDescriptor(io.BytesIO(section.bytes), name, None,
            raw_size if virtual_size == 0 else min((raw_size, virtual_size)), 0)
        for (name, section, virtual_size, raw_size) in sections}

    if not '.debug_info' in data:
        return None

    machine = pefile.imageNtHeaders.header.FileHeader.Machine
    is64 = machine in (IMAGE_FILE_MACHINE.AMD64, IMAGE_FILE_MACHINE.ARM64, IMAGE_FILE_MACHINE.IA64) # There are also some exotic architectures...
    di = DWARFInfo(
        config = DwarfConfig(
            little_endian = True,
            default_address_size = 8 if is64 else 4,
            machine_arch = IMAGE_FILE_MACHINE[machine].name
        ),
        debug_info_sec = data['.debug_info'],
        debug_aranges_sec = data.get('.debug_aranges'),
        debug_abbrev_sec = data.get('.debug_abbrev'),
        debug_frame_sec = data.get('.debug_frame'),
        eh_frame_sec = None, # Haven't see one in the wild so far
        debug_str_sec = data.get('.debug_str'),
        debug_loc_sec = data.get('.debug_loc'),
        debug_ranges_sec = data.get('.debug_ranges'),
        debug_line_sec = data.get('.debug_line'),
        debug_pubtypes_sec = data.get('.debug_pubtypes'),
        debug_pubnames_sec = data.get('.debug_pubnames'),
    )
    di._format = 2
    return di

Example #5

Show file

    def _read_dwarf_section(self, section, relocate_dwarf_sections):
        """
        Read the contents of a DWARF section from the stream and return a
        DebugSectionDescriptor. Apply relocations if asked to.
        """
        # The section data is read into a new stream, for processing
        section_stream = BytesIO()
        section_stream.write(section.get_data())

        if relocate_dwarf_sections:
            reloc_handler = RelocationHandler(self)
            reloc_section = reloc_handler.find_relocations_for_section(section)
            if reloc_section is not None:
                reloc_handler.apply_section_relocations(section_stream, reloc_section)

        return DebugSectionDescriptor(
            stream=section_stream,
            name=section.name,
            global_offset=section.PointerToRawData,
            size=section.SizeOfRawData,
            address=section.get_rva_from_offset(0))

Example #6

Show file

def process_file(filename):
    logging.debug('Processing file: {}'.format(filename))
    logging.debug('Working directory: {}'.format(os.getcwd()))

    coff = epyqlib.ticoff.Coff()
    coff.from_file(filename)

    section_bytes = {
        s.name: (io.BytesIO(s.data), len(s.data))
        for s in coff.sections if s.name.startswith('.debug_')
    }
    debug_sections = {
        name: DebugSectionDescriptor(stream=stream,
                                     name=name,
                                     global_offset=0,
                                     size=length)
        for name, (stream, length) in section_bytes.items()
    }

    from elftools.dwarf.dwarfinfo import DWARFInfo, DwarfConfig
    dwarfinfo = DWARFInfo(
        config=DwarfConfig(little_endian=True,
                           default_address_size=4,
                           machine_arch='<unknown>'),
        debug_info_sec=debug_sections.get('.debug_info', None),
        # debug_info_sec=DebugSectionDescriptor(
        #     stream=io.BytesIO(dwarf_debug_info_bytes),
        #     name='.debug_info',
        #     global_offset=0,
        #     size=len(dwarf_debug_info_bytes)),
        debug_aranges_sec=debug_sections.get('.debug_aranges', None),
        debug_abbrev_sec=debug_sections.get('.debug_abbrev', None),
        debug_frame_sec=debug_sections.get('.debug_frame', None),
        # TODO(eliben): reading of eh_frame is not hooked up yet
        eh_frame_sec=None,
        debug_str_sec=debug_sections.get('.debug_str', None),
        debug_loc_sec=debug_sections.get('.debug_loc', None),
        debug_ranges_sec=debug_sections.get('.debug_ranges', None),
        debug_line_sec=debug_sections.get('.debug_line', None))

    objects = collections.OrderedDict((tag, []) for tag in [
        'DW_TAG_subprogram', 'DW_TAG_variable', 'DW_TAG_typedef',
        'DW_TAG_base_type', 'DW_AT_encoding', 'DW_TAG_structure_type',
        'DW_TAG_union_type', 'DW_TAG_ptr_to_member_type',
        'DW_TAG_enumeration_type', 'DW_TAG_pointer_type', 'DW_TAG_array_type',
        'DW_TAG_volatile_type', 'DW_TAG_const_type', 'DW_TAG_restrict_type',
        'DW_TAG_lo_user', 'DW_TAG_hi_user', 'DW_TAG_unspecified_type',
        'DW_TAG_subroutine_type'
    ])

    for CU in dwarfinfo.iter_CUs():
        # it = dwarfinfo.iter_CUs()
        # while True:
        #     try:
        #         CU = next(it)
        #     except StopIteration:
        #         break
        #     except elftools.common.exceptions.DWARFError:
        #         traceback.print_exc()
        #         logging.debug('Skipping current CU')
        #         next

        # DWARFInfo allows to iterate over the compile units contained in
        # the .debug_info section. CU is a CompileUnit object, with some
        # computed attributes (such as its offset in the section) and
        # a header which conforms to the DWARF standard. The access to
        # header elements is, as usual, via item-lookup.
        logging.debug('  Found a compile unit at offset %s, length %s' %
                      (CU.cu_offset, CU['unit_length']))

        # Start with the top DIE, the root for this CU's DIE tree
        top_DIE = CU.get_top_DIE()
        logging.debug('    Top DIE with tag=%s' % top_DIE.tag)

        path = top_DIE.get_full_path()
        # We're interested in the filename...
        logging.debug('    name=%s' % path)

        if path.endswith('__TI_internal'):
            logging.debug('__TI_internal found, terminating DWARF parsing')
            break
        else:
            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE, objects=objects)
            # pass

    def die_info_rec_structure_type(die, indent_level):
        for child in die.iter_children():
            # logging.debug(indent_level + str(child.attributes['DW_AT_name'].value.decode('utf-8')))
            location = str(
                child.attributes['DW_AT_data_member_location'].value)
            name = str(child.attributes['DW_AT_name'].value.decode('utf-8'))
            logging.debug(indent_level + name + ': ' + location)
            # logging.debug(indent_level + str(child.attributes['DW_AT_name'].value.decode('utf-8')) + ': ' + str(child.attributes['DW_AT_data_member_location'].value.decode('utf-u')))

    # this is yucky but the embedded system is weird with two bytes
    # per address and even sizeof() responds in units of addressable units
    # rather than actual bytes
    byte_size_fudge = 1

    offsets = {}

    types = []
    for die in objects['DW_TAG_base_type']:
        type = Type(name=die.attributes['DW_AT_name'].value.decode('utf-8'),
                    bytes=die.attributes['DW_AT_byte_size'].value *
                    byte_size_fudge,
                    format=TypeFormats(die.attributes['DW_AT_encoding'].value))
        types.append(type)
        offsets[die.offset] = type
        logging.debug('{: 10d} {}'.format(die.offset, type))

    variables = []
    for die in objects['DW_TAG_variable']:
        location = die.attributes.get('DW_AT_location', [])
        if location:
            location = location.value

        # TODO: check this better
        if len(location) != 5:
            continue
        address = int.from_bytes(bytes(location[1:5]), 'little')

        variable = Variable(
            name=die.attributes['DW_AT_name'].value.decode('utf-8'),
            type=die.attributes['DW_AT_type'].value,
            address=address,
            file=get_die_path(die))
        variables.append(variable)
        offsets[die.offset] = variable
        logging.debug('{: 10d} {}'.format(die.offset, variable))

    lo_users = []
    for die in objects['DW_TAG_lo_user']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        lo_user = LoUser(type=die.attributes['DW_AT_type'].value)
        lo_users.append(lo_user)
        offsets[die.offset] = lo_user
        logging.debug('{: 10d} {}'.format(die.offset, lo_user))

    hi_users = []
    for die in objects['DW_TAG_hi_user']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        hi_user = HiUser(type=die.attributes['DW_AT_type'].value)
        hi_users.append(hi_user)
        offsets[die.offset] = hi_user
        logging.debug('{: 10d} {}'.format(die.offset, hi_user))

    subroutine_types = []
    for die in objects['DW_TAG_subroutine_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        type = die.attributes.get('DW_AT_type', None)
        if type is not None:
            type = type.value
        subroutine_type = SubroutineType(name=name, return_type=type)
        for parameter in die.iter_children():
            subroutine_type.parameters.append(
                parameter.attributes['DW_AT_type'].value)
        subroutine_types.append(subroutine_type)
        offsets[die.offset] = subroutine_type
        logging.debug('{: 10d} {}'.format(die.offset, subroutine_type))

    unspecified_types = []
    for die in objects['DW_TAG_unspecified_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        unspecified_type = UnspecifiedType(name=name)
        unspecified_types.append(unspecified_type)
        offsets[die.offset] = unspecified_type
        logging.debug('{: 10d} {}'.format(die.offset, unspecified_type))

    pointer_types = []
    for die in objects['DW_TAG_pointer_type']:
        type = die.attributes['DW_AT_type'].value
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
            pointer_type = PointerType(name=name, type=type)
        else:
            pointer_type = PointerType(type=type)
        pointer_types.append(pointer_type)
        offsets[die.offset] = pointer_type
        logging.debug('{: 10d} {}'.format(die.offset, pointer_type))

    volatile_types = []
    for die in objects['DW_TAG_volatile_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        volatile_type = VolatileType(name=name,
                                     type=die.attributes['DW_AT_type'].value)
        volatile_types.append(volatile_type)
        offsets[die.offset] = volatile_type
        logging.debug('{: 10d} {}'.format(die.offset, volatile_type))

    array_types = []
    for die in objects['DW_TAG_array_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        byte_size = die.attributes.get('DW_AT_byte_size', None)
        if byte_size is not None:
            byte_size = byte_size.value
        array_type = ArrayType(name=name,
                               bytes=byte_size,
                               type=die.attributes['DW_AT_type'].value)
        array_types.append(array_type)
        offsets[die.offset] = array_type
        logging.debug('{: 10d} {}'.format(die.offset, array_type))

    const_types = []
    for die in objects['DW_TAG_const_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        const_type = ConstType(name=name,
                               type=die.attributes['DW_AT_type'].value)
        const_types.append(const_type)
        offsets[die.offset] = const_type
        logging.debug('{: 10d} {}'.format(die.offset, const_type))

    restrict_types = []
    for die in objects['DW_TAG_restrict_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        restrict_type = RestrictType(name=name,
                                     type=die.attributes['DW_AT_type'].value)
        restrict_types.append(restrict_type)
        offsets[die.offset] = restrict_type
        logging.debug('{: 10d} {}'.format(die.offset, restrict_type))

    structure_types = []
    for die in objects['DW_TAG_structure_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        byte_size_attribute = die.attributes.get('DW_AT_byte_size')
        if byte_size_attribute is None:
            print(
                'Skipping DW_TAG_structure_type due to lack of '
                'DW_AT_byte_size', name)
            continue
        struct = Struct(name=name, bytes=byte_size_attribute.value)
        structure_types.append(struct)
        offsets[die.offset] = struct
        for member_die in die.iter_children():
            a = member_die.attributes
            bit_offset = a.get('DW_AT_bit_offset', None)
            if bit_offset is not None:
                bit_offset = bit_offset.value
            bit_size = a.get('DW_AT_bit_size', None)
            if bit_size is not None:
                bit_size = bit_size.value
            # TODO: location[1] is just based on observation
            name = a['DW_AT_name'].value.decode('utf-8')
            struct.members[name] = StructMember(
                name=name,
                type=a['DW_AT_type'].value,
                location=a['DW_AT_data_member_location'].value[1],
                bit_offset=bit_offset,
                bit_size=bit_size)
        logging.debug(list(die.iter_children()))
        logging.debug('{: 10d} {}'.format(die.offset, struct))

    union_types = []
    for die in objects['DW_TAG_union_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        byte_size_attribute = die.attributes.get('DW_AT_byte_size')
        if byte_size_attribute is None:
            print(
                'Skipping DW_TAG_union_type due to lack of '
                'DW_AT_byte_size', name)
            continue

        members = collections.OrderedDict(((
            member.attributes['DW_AT_name'].value.decode('utf-8'),
            UnionMember(
                name=member.attributes['DW_AT_name'].value.decode('utf-8'),
                type=member.attributes.get('DW_AT_type').value,
            ),
        ) for member in die.iter_children()))

        union = Union(
            name=name,
            bytes=byte_size_attribute.value,
            members=members,
        )
        union_types.append(union)
        offsets[die.offset] = union
        logging.debug('{: 10d} {}'.format(die.offset, union))

    pointer_to_member_types = []
    for die in objects['DW_TAG_ptr_to_member_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        pointer_to_member = PointerToMember(name=name)
        pointer_to_member_types.append(pointer_to_member)
        offsets[die.offset] = pointer_to_member
        logging.debug('{: 10d} {}'.format(die.offset, pointer_to_member))

    enumeration_types = []
    for die in objects['DW_TAG_enumeration_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        type = die.attributes.get('DW_AT_type', None)
        if type is not None:
            type = type.value
        enumeration = EnumerationType(
            name=name,
            bytes=die.attributes['DW_AT_byte_size'].value * byte_size_fudge,
            type=type)
        for value in die.iter_children():
            enumeration.values.append(
                EnumerationValue(
                    name=value.attributes['DW_AT_name'].value.decode('utf-8'),
                    value=value.attributes['DW_AT_const_value'].value))
        enumeration_types.append(enumeration)
        offsets[die.offset] = enumeration
        logging.debug('{: 10d} {}'.format(die.offset, enumeration))

    typedefs = []
    for die in objects['DW_TAG_typedef']:
        typedef = TypeDef(
            name=die.attributes['DW_AT_name'].value.decode('utf-8'),
            type=(die.offset, die.attributes['DW_AT_type'].value))
        typedefs.append(typedef)
        offsets[die.offset] = typedef

    offset_values = sorted(offsets.keys())
    logging.debug(len(offset_values))
    logging.debug(offset_values)
    fails = 0
    for typedef in typedefs:
        offset = typedef.type[0]
        try:
            typedef.type = offsets[typedef.type[1]]
        except KeyError:
            logging.debug('Failed to find type for {}'.format(typedef))
            fails += 1
        else:
            logging.debug('{: 10d} {}'.format(offset, typedef))
    logging.debug(fails)

    for structure in structure_types:
        for member in structure.members.values():
            member.type = offsets[member.type]

    for union in union_types:
        for member in union.members.values():
            member.type = offsets[member.type]

    passes = 0
    while True:
        logging.debug('Starting pass {}'.format(passes))
        pass_again = False
        for item in subroutine_types:
            if isinstance(item.return_type, int):
                item.return_type = offsets[item.return_type]
            for i, parameter in enumerate(item.parameters):
                if isinstance(parameter, int):
                    item.parameters[i] = offsets[parameter]

        for item in offsets.values():
            if hasattr(item, 'type') and isinstance(item.type, int):
                try:
                    item.type = offsets[item.type]
                except KeyError:
                    if passes >= 10:
                        logging.debug(item)
                        raise
                    pass_again = True

        passes += 1

        if not pass_again:
            break

    # for pointer_type in pointer_types:
    #     logging.debug(pointer_type)
    #     pointer_type.type = offsets[pointer_type.type]
    #     logging.debug(pointer_type)
    #
    # for array_type in array_types:
    #     logging.debug(array_type)
    #     array_type.type = offsets[array_type.type]
    #     logging.debug(array_type)
    #
    # for volatile_type in volatile_types:
    #     logging.debug(volatile_type)
    #     volatile_type.type = offsets[volatile_type.type]
    #     logging.debug(volatile_type)

    names = collections.defaultdict(list)
    for item in offsets.values():
        if hasattr(item, 'name'):
            valid = False
            if item.name is None:
                valid = True
            elif is_modifier(item):
                pass
            elif item.name.startswith('$'):
                pass
            elif isinstance(item, SubroutineType):
                pass
            else:
                valid = True

            if valid:
                names[item.name].append(item)

    result = names, variables, bits_per_byte

    logging.debug('Finished processing file: {}'.format(filename))

    return result