def test_main(self):
        # Read the three saved sections as bytestreams
        with open(
                os.path.join('test', 'testfiles_for_unittests',
                             'arm64_on_dwarfv2.info.dat'), 'rb') as f:
            info = f.read()
        with open(
                os.path.join('test', 'testfiles_for_unittests',
                             'arm64_on_dwarfv2.abbrev.dat'), 'rb') as f:
            abbrev = f.read()
        with open(
                os.path.join('test', 'testfiles_for_unittests',
                             'arm64_on_dwarfv2.str.dat'), 'rb') as f:
            str = f.read()

        # Parse the DWARF info
        di = DWARFInfo(
            config=DwarfConfig(little_endian=True,
                               default_address_size=8,
                               machine_arch="ARM64"),
            debug_info_sec=DebugSectionDescriptor(io.BytesIO(info),
                                                  '__debug_info', None,
                                                  len(info), 0),
            debug_aranges_sec=None,
            debug_abbrev_sec=DebugSectionDescriptor(io.BytesIO(abbrev),
                                                    '__debug_abbrev', None,
                                                    len(abbrev), 0),
            debug_frame_sec=None,
            eh_frame_sec=None,
            debug_str_sec=DebugSectionDescriptor(io.BytesIO(str),
                                                 '__debug_str', None, len(str),
                                                 0),
            debug_loc_sec=None,
            debug_ranges_sec=None,
            debug_line_sec=None,
            debug_pubtypes_sec=None,
            debug_pubnames_sec=None,
            debug_addr_sec=None,
            debug_str_offsets_sec=None,
            debug_line_str_sec=None,
        )

        CUs = [cu for cu in di.iter_CUs()]
        # Locate a CU that I know has a reference in DW_FORM_ref_addr form
        CU = CUs[21]
        self.assertEqual(CU['version'], 2)
        # Make sure pyelftools appreciates the difference between the target address size and DWARF inter-DIE offset size
        self.assertEqual(CU.structs.dwarf_format, 32)
        self.assertEqual(CU['address_size'], 8)
        DIEs = [die for die in CU.iter_DIEs()]
        # Before the patch, DIE #2 is misparsed, the current offset is off, the rest are misparsed too
        self.assertEqual(len(DIEs), 15)
Ejemplo n.º 2
0
def read_macho(filename, resolve_arch, friendly_filename):
    from filebytes.mach_o import MachO, CpuType, TypeFlags, LC
    fat_arch = None
    macho = MachO(filename)
    if macho.isFat:
        slices = [make_macho_arch_name(slice) for slice in macho.fatArches]
        arch_no = resolve_arch(slices)
        if arch_no is None: # User cancellation
            return False
        fat_arch = slices[arch_no]
        macho = macho.fatArches[arch_no]

    # We proceed with macho being a arch-specific file, or a slice within a fat binary
    data = {
        section.name: DebugSectionDescriptor(io.BytesIO(section.bytes), section.name, None, len(section.bytes), 0)
        for cmd in macho.loadCommands
        if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64)
        for section in cmd.sections
        if section.name.startswith('__debug')
    }

    #macho_save_sections(friendly_filename, macho)

    if not '__debug_info' in data:
        return None

    cpu = macho.machHeader.header.cputype
    di = DWARFInfo(
        config = DwarfConfig(
            little_endian=True,
            default_address_size = 8 if (cpu & TypeFlags.ABI64) != 0 else 4,
            machine_arch = make_macho_arch_name(macho)
        ),
        debug_info_sec = data['__debug_info'],
        debug_aranges_sec = data.get('__debug_aranges'),
        debug_abbrev_sec = data['__debug_abbrev'],
        debug_frame_sec = data.get('__debug_frame'),
        eh_frame_sec = None, # Haven't seen those in Mach-O
        debug_str_sec = data['__debug_str'],
        debug_loc_sec = data.get('__debug_loc'),
        debug_ranges_sec = data.get('__debug_ranges'),
        debug_line_sec = data.get('__debug_line'),
        debug_pubtypes_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubn?
        debug_pubnames_sec = data.get('__debug_pubtypes'), #__debug_gnu_pubt?
    )
    di._format = 1
    di._fat_arch = fat_arch
    text_cmd = next((cmd for cmd in macho.loadCommands if cmd.header.cmd in (LC.SEGMENT, LC.SEGMENT_64) and cmd.name == "__TEXT"), False)
    di._start_address = text_cmd.header.vmaddr if text_cmd else 0
    return di
Ejemplo n.º 3
0
def read_pe(filename):
    from filebytes.pe import PE, IMAGE_FILE_MACHINE

    pefile = PE(filename)

    # Section's real size might be padded - see https://github.com/sashs/filebytes/issues/28
    sections = [(section.name, section,
        section.header.PhysicalAddress_or_VirtualSize,
        section.header.SizeOfRawData)
        for section in pefile.sections
        if section.name.startswith('.debug')]

    data = {name: DebugSectionDescriptor(io.BytesIO(section.bytes), name, None,
            raw_size if virtual_size == 0 else min((raw_size, virtual_size)), 0)
        for (name, section, virtual_size, raw_size) in sections}

    if not '.debug_info' in data:
        return None

    machine = pefile.imageNtHeaders.header.FileHeader.Machine
    is64 = machine in (IMAGE_FILE_MACHINE.AMD64, IMAGE_FILE_MACHINE.ARM64, IMAGE_FILE_MACHINE.IA64) # There are also some exotic architectures...
    di = DWARFInfo(
        config = DwarfConfig(
            little_endian = True,
            default_address_size = 8 if is64 else 4,
            machine_arch = IMAGE_FILE_MACHINE[machine].name
        ),
        debug_info_sec = data['.debug_info'],
        debug_aranges_sec = data.get('.debug_aranges'),
        debug_abbrev_sec = data.get('.debug_abbrev'),
        debug_frame_sec = data.get('.debug_frame'),
        eh_frame_sec = None, # Haven't see one in the wild so far
        debug_str_sec = data.get('.debug_str'),
        debug_loc_sec = data.get('.debug_loc'),
        debug_ranges_sec = data.get('.debug_ranges'),
        debug_line_sec = data.get('.debug_line'),
        debug_pubtypes_sec = data.get('.debug_pubtypes'),
        debug_pubnames_sec = data.get('.debug_pubnames'),
    )
    di._format = 2
    return di
Ejemplo n.º 4
0
    def __init__(self, elffile):
        section = elffile.get_section_by_name(".debug")
        section_data = section.data()
        self.section_size = section_size = len(section_data)
        self.stm = stm = BytesIO()
        self.stm.write(section_data)
        self.stm.seek(0, 0)

        lsection = elffile.get_section_by_name(".line")
        if lsection:
            self.linestream = BytesIO()
            self.linestream.write(lsection.data())
            self.linestream.seek(0, 0)

        self.config = DwarfConfig(little_endian=elffile.little_endian,
                                  default_address_size=elffile.elfclass // 8,
                                  machine_arch=elffile.get_machine_arch())

        self.structs = DWARFStructs(
            little_endian=self.config.little_endian,
            dwarf_format=32,
            address_size=self.config.default_address_size)
Ejemplo n.º 5
0
def process_file(filename):
    logging.debug('Processing file: {}'.format(filename))
    logging.debug('Working directory: {}'.format(os.getcwd()))

    coff = epyqlib.ticoff.Coff()
    coff.from_file(filename)

    section_bytes = {
        s.name: (io.BytesIO(s.data), len(s.data))
        for s in coff.sections if s.name.startswith('.debug_')
    }
    debug_sections = {
        name: DebugSectionDescriptor(stream=stream,
                                     name=name,
                                     global_offset=0,
                                     size=length)
        for name, (stream, length) in section_bytes.items()
    }

    from elftools.dwarf.dwarfinfo import DWARFInfo, DwarfConfig
    dwarfinfo = DWARFInfo(
        config=DwarfConfig(little_endian=True,
                           default_address_size=4,
                           machine_arch='<unknown>'),
        debug_info_sec=debug_sections.get('.debug_info', None),
        # debug_info_sec=DebugSectionDescriptor(
        #     stream=io.BytesIO(dwarf_debug_info_bytes),
        #     name='.debug_info',
        #     global_offset=0,
        #     size=len(dwarf_debug_info_bytes)),
        debug_aranges_sec=debug_sections.get('.debug_aranges', None),
        debug_abbrev_sec=debug_sections.get('.debug_abbrev', None),
        debug_frame_sec=debug_sections.get('.debug_frame', None),
        # TODO(eliben): reading of eh_frame is not hooked up yet
        eh_frame_sec=None,
        debug_str_sec=debug_sections.get('.debug_str', None),
        debug_loc_sec=debug_sections.get('.debug_loc', None),
        debug_ranges_sec=debug_sections.get('.debug_ranges', None),
        debug_line_sec=debug_sections.get('.debug_line', None))

    objects = collections.OrderedDict((tag, []) for tag in [
        'DW_TAG_subprogram', 'DW_TAG_variable', 'DW_TAG_typedef',
        'DW_TAG_base_type', 'DW_AT_encoding', 'DW_TAG_structure_type',
        'DW_TAG_union_type', 'DW_TAG_ptr_to_member_type',
        'DW_TAG_enumeration_type', 'DW_TAG_pointer_type', 'DW_TAG_array_type',
        'DW_TAG_volatile_type', 'DW_TAG_const_type', 'DW_TAG_restrict_type',
        'DW_TAG_lo_user', 'DW_TAG_hi_user', 'DW_TAG_unspecified_type',
        'DW_TAG_subroutine_type'
    ])

    for CU in dwarfinfo.iter_CUs():
        # it = dwarfinfo.iter_CUs()
        # while True:
        #     try:
        #         CU = next(it)
        #     except StopIteration:
        #         break
        #     except elftools.common.exceptions.DWARFError:
        #         traceback.print_exc()
        #         logging.debug('Skipping current CU')
        #         next

        # DWARFInfo allows to iterate over the compile units contained in
        # the .debug_info section. CU is a CompileUnit object, with some
        # computed attributes (such as its offset in the section) and
        # a header which conforms to the DWARF standard. The access to
        # header elements is, as usual, via item-lookup.
        logging.debug('  Found a compile unit at offset %s, length %s' %
                      (CU.cu_offset, CU['unit_length']))

        # Start with the top DIE, the root for this CU's DIE tree
        top_DIE = CU.get_top_DIE()
        logging.debug('    Top DIE with tag=%s' % top_DIE.tag)

        path = top_DIE.get_full_path()
        # We're interested in the filename...
        logging.debug('    name=%s' % path)

        if path.endswith('__TI_internal'):
            logging.debug('__TI_internal found, terminating DWARF parsing')
            break
        else:
            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE, objects=objects)
            # pass

    def die_info_rec_structure_type(die, indent_level):
        for child in die.iter_children():
            # logging.debug(indent_level + str(child.attributes['DW_AT_name'].value.decode('utf-8')))
            location = str(
                child.attributes['DW_AT_data_member_location'].value)
            name = str(child.attributes['DW_AT_name'].value.decode('utf-8'))
            logging.debug(indent_level + name + ': ' + location)
            # logging.debug(indent_level + str(child.attributes['DW_AT_name'].value.decode('utf-8')) + ': ' + str(child.attributes['DW_AT_data_member_location'].value.decode('utf-u')))

    # this is yucky but the embedded system is weird with two bytes
    # per address and even sizeof() responds in units of addressable units
    # rather than actual bytes
    byte_size_fudge = 1

    offsets = {}

    types = []
    for die in objects['DW_TAG_base_type']:
        type = Type(name=die.attributes['DW_AT_name'].value.decode('utf-8'),
                    bytes=die.attributes['DW_AT_byte_size'].value *
                    byte_size_fudge,
                    format=TypeFormats(die.attributes['DW_AT_encoding'].value))
        types.append(type)
        offsets[die.offset] = type
        logging.debug('{: 10d} {}'.format(die.offset, type))

    variables = []
    for die in objects['DW_TAG_variable']:
        location = die.attributes.get('DW_AT_location', [])
        if location:
            location = location.value

        # TODO: check this better
        if len(location) != 5:
            continue
        address = int.from_bytes(bytes(location[1:5]), 'little')

        variable = Variable(
            name=die.attributes['DW_AT_name'].value.decode('utf-8'),
            type=die.attributes['DW_AT_type'].value,
            address=address,
            file=get_die_path(die))
        variables.append(variable)
        offsets[die.offset] = variable
        logging.debug('{: 10d} {}'.format(die.offset, variable))

    lo_users = []
    for die in objects['DW_TAG_lo_user']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        lo_user = LoUser(type=die.attributes['DW_AT_type'].value)
        lo_users.append(lo_user)
        offsets[die.offset] = lo_user
        logging.debug('{: 10d} {}'.format(die.offset, lo_user))

    hi_users = []
    for die in objects['DW_TAG_hi_user']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        hi_user = HiUser(type=die.attributes['DW_AT_type'].value)
        hi_users.append(hi_user)
        offsets[die.offset] = hi_user
        logging.debug('{: 10d} {}'.format(die.offset, hi_user))

    subroutine_types = []
    for die in objects['DW_TAG_subroutine_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        type = die.attributes.get('DW_AT_type', None)
        if type is not None:
            type = type.value
        subroutine_type = SubroutineType(name=name, return_type=type)
        for parameter in die.iter_children():
            subroutine_type.parameters.append(
                parameter.attributes['DW_AT_type'].value)
        subroutine_types.append(subroutine_type)
        offsets[die.offset] = subroutine_type
        logging.debug('{: 10d} {}'.format(die.offset, subroutine_type))

    unspecified_types = []
    for die in objects['DW_TAG_unspecified_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        unspecified_type = UnspecifiedType(name=name)
        unspecified_types.append(unspecified_type)
        offsets[die.offset] = unspecified_type
        logging.debug('{: 10d} {}'.format(die.offset, unspecified_type))

    pointer_types = []
    for die in objects['DW_TAG_pointer_type']:
        type = die.attributes['DW_AT_type'].value
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
            pointer_type = PointerType(name=name, type=type)
        else:
            pointer_type = PointerType(type=type)
        pointer_types.append(pointer_type)
        offsets[die.offset] = pointer_type
        logging.debug('{: 10d} {}'.format(die.offset, pointer_type))

    volatile_types = []
    for die in objects['DW_TAG_volatile_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        volatile_type = VolatileType(name=name,
                                     type=die.attributes['DW_AT_type'].value)
        volatile_types.append(volatile_type)
        offsets[die.offset] = volatile_type
        logging.debug('{: 10d} {}'.format(die.offset, volatile_type))

    array_types = []
    for die in objects['DW_TAG_array_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        byte_size = die.attributes.get('DW_AT_byte_size', None)
        if byte_size is not None:
            byte_size = byte_size.value
        array_type = ArrayType(name=name,
                               bytes=byte_size,
                               type=die.attributes['DW_AT_type'].value)
        array_types.append(array_type)
        offsets[die.offset] = array_type
        logging.debug('{: 10d} {}'.format(die.offset, array_type))

    const_types = []
    for die in objects['DW_TAG_const_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        const_type = ConstType(name=name,
                               type=die.attributes['DW_AT_type'].value)
        const_types.append(const_type)
        offsets[die.offset] = const_type
        logging.debug('{: 10d} {}'.format(die.offset, const_type))

    restrict_types = []
    for die in objects['DW_TAG_restrict_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        restrict_type = RestrictType(name=name,
                                     type=die.attributes['DW_AT_type'].value)
        restrict_types.append(restrict_type)
        offsets[die.offset] = restrict_type
        logging.debug('{: 10d} {}'.format(die.offset, restrict_type))

    structure_types = []
    for die in objects['DW_TAG_structure_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        byte_size_attribute = die.attributes.get('DW_AT_byte_size')
        if byte_size_attribute is None:
            print(
                'Skipping DW_TAG_structure_type due to lack of '
                'DW_AT_byte_size', name)
            continue
        struct = Struct(name=name, bytes=byte_size_attribute.value)
        structure_types.append(struct)
        offsets[die.offset] = struct
        for member_die in die.iter_children():
            a = member_die.attributes
            bit_offset = a.get('DW_AT_bit_offset', None)
            if bit_offset is not None:
                bit_offset = bit_offset.value
            bit_size = a.get('DW_AT_bit_size', None)
            if bit_size is not None:
                bit_size = bit_size.value
            # TODO: location[1] is just based on observation
            name = a['DW_AT_name'].value.decode('utf-8')
            struct.members[name] = StructMember(
                name=name,
                type=a['DW_AT_type'].value,
                location=a['DW_AT_data_member_location'].value[1],
                bit_offset=bit_offset,
                bit_size=bit_size)
        logging.debug(list(die.iter_children()))
        logging.debug('{: 10d} {}'.format(die.offset, struct))

    union_types = []
    for die in objects['DW_TAG_union_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        byte_size_attribute = die.attributes.get('DW_AT_byte_size')
        if byte_size_attribute is None:
            print(
                'Skipping DW_TAG_union_type due to lack of '
                'DW_AT_byte_size', name)
            continue

        members = collections.OrderedDict(((
            member.attributes['DW_AT_name'].value.decode('utf-8'),
            UnionMember(
                name=member.attributes['DW_AT_name'].value.decode('utf-8'),
                type=member.attributes.get('DW_AT_type').value,
            ),
        ) for member in die.iter_children()))

        union = Union(
            name=name,
            bytes=byte_size_attribute.value,
            members=members,
        )
        union_types.append(union)
        offsets[die.offset] = union
        logging.debug('{: 10d} {}'.format(die.offset, union))

    pointer_to_member_types = []
    for die in objects['DW_TAG_ptr_to_member_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        pointer_to_member = PointerToMember(name=name)
        pointer_to_member_types.append(pointer_to_member)
        offsets[die.offset] = pointer_to_member
        logging.debug('{: 10d} {}'.format(die.offset, pointer_to_member))

    enumeration_types = []
    for die in objects['DW_TAG_enumeration_type']:
        name = die.attributes.get('DW_AT_name', None)
        if name is not None:
            name = name.value.decode('utf-8')
        type = die.attributes.get('DW_AT_type', None)
        if type is not None:
            type = type.value
        enumeration = EnumerationType(
            name=name,
            bytes=die.attributes['DW_AT_byte_size'].value * byte_size_fudge,
            type=type)
        for value in die.iter_children():
            enumeration.values.append(
                EnumerationValue(
                    name=value.attributes['DW_AT_name'].value.decode('utf-8'),
                    value=value.attributes['DW_AT_const_value'].value))
        enumeration_types.append(enumeration)
        offsets[die.offset] = enumeration
        logging.debug('{: 10d} {}'.format(die.offset, enumeration))

    typedefs = []
    for die in objects['DW_TAG_typedef']:
        typedef = TypeDef(
            name=die.attributes['DW_AT_name'].value.decode('utf-8'),
            type=(die.offset, die.attributes['DW_AT_type'].value))
        typedefs.append(typedef)
        offsets[die.offset] = typedef

    offset_values = sorted(offsets.keys())
    logging.debug(len(offset_values))
    logging.debug(offset_values)
    fails = 0
    for typedef in typedefs:
        offset = typedef.type[0]
        try:
            typedef.type = offsets[typedef.type[1]]
        except KeyError:
            logging.debug('Failed to find type for {}'.format(typedef))
            fails += 1
        else:
            logging.debug('{: 10d} {}'.format(offset, typedef))
    logging.debug(fails)

    for structure in structure_types:
        for member in structure.members.values():
            member.type = offsets[member.type]

    for union in union_types:
        for member in union.members.values():
            member.type = offsets[member.type]

    passes = 0
    while True:
        logging.debug('Starting pass {}'.format(passes))
        pass_again = False
        for item in subroutine_types:
            if isinstance(item.return_type, int):
                item.return_type = offsets[item.return_type]
            for i, parameter in enumerate(item.parameters):
                if isinstance(parameter, int):
                    item.parameters[i] = offsets[parameter]

        for item in offsets.values():
            if hasattr(item, 'type') and isinstance(item.type, int):
                try:
                    item.type = offsets[item.type]
                except KeyError:
                    if passes >= 10:
                        logging.debug(item)
                        raise
                    pass_again = True

        passes += 1

        if not pass_again:
            break

    # for pointer_type in pointer_types:
    #     logging.debug(pointer_type)
    #     pointer_type.type = offsets[pointer_type.type]
    #     logging.debug(pointer_type)
    #
    # for array_type in array_types:
    #     logging.debug(array_type)
    #     array_type.type = offsets[array_type.type]
    #     logging.debug(array_type)
    #
    # for volatile_type in volatile_types:
    #     logging.debug(volatile_type)
    #     volatile_type.type = offsets[volatile_type.type]
    #     logging.debug(volatile_type)

    names = collections.defaultdict(list)
    for item in offsets.values():
        if hasattr(item, 'name'):
            valid = False
            if item.name is None:
                valid = True
            elif is_modifier(item):
                pass
            elif item.name.startswith('$'):
                pass
            elif isinstance(item, SubroutineType):
                pass
            else:
                valid = True

            if valid:
                names[item.name].append(item)

    result = names, variables, bits_per_byte

    logging.debug('Finished processing file: {}'.format(filename))

    return result