Ejemplo n.º 1
0
    def __init__(self, fp, **kwargs):
        super().__init__()

        self.elf = ELFFile(fp)
        self.dwarf = self.elf.get_dwarf_info()
        self.expr_parser = DWARFExprParser(self.dwarf.structs)
        self.loc_parser = self.dwarf.location_lists()
        self.arch = ELF.extract_arch(self.elf)
Ejemplo n.º 2
0
    def test_single(self):
        p = DWARFExprParser(self.structs32)
        lst = p.parse_expr([0x1b])
        self.assertEqual(lst,
                         [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[])])

        lst = p.parse_expr([0x90, 16])
        self.assertEqual(
            lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16])])

        lst = p.parse_expr([0xe0])
        self.assertEqual(lst, [
            DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address', args=[])
        ])
Ejemplo n.º 3
0
    def dump_expr(self, expr):
        if self.die.cu._exprparser is None:
            self.die.cu._exprparser = DWARFExprParser(self.die.cu.structs) if self.die.cu['version'] > 1 else DWARFExprParserV1(self.die.cu.structs)

        # Challenge: for nested expressions, args is a list with a list of commands
        # For those, the format is: op {op arg, arg; op arg, arg}
        # Can't just check for iterable, str is iterable too
        return [self.format_op(*op) for op in self.die.cu._exprparser.parse_expr(expr)]
Ejemplo n.º 4
0
    def test_single(self):
        p = DWARFExprParser(self.structs32)
        lst = p.parse_expr([0x1b])
        self.assertEqual(lst,
                         [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[])])

        lst = p.parse_expr([0x90, 16])
        self.assertEqual(
            lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16])])

        lst = p.parse_expr([0xe0])
        self.assertEqual(len(lst), 1)
        # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so
        # check for both to prevent non-determinism.
        self.assertIn(lst[0], [
            DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address',
                        args=[]),
            DWARFExprOp(op=0xe0, op_name='DW_OP_lo_user', args=[])
        ])
Ejemplo n.º 5
0
    def test_single(self):
        p = DWARFExprParser(self.structs32)
        lst = p.parse_expr([0x1b])
        self.assertEqual(
            lst,
            [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[], offset=0)])

        lst = p.parse_expr([0x90, 16])
        self.assertEqual(
            lst,
            [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16], offset=0)])

        lst = p.parse_expr([0xe0])
        self.assertEqual(len(lst), 1)
        # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so
        # check for both to prevent non-determinism.
        self.assertIn(lst[0], [
            DWARFExprOp(op=0xe0,
                        op_name='DW_OP_GNU_push_tls_address',
                        args=[],
                        offset=0),
            DWARFExprOp(op=0xe0, op_name='DW_OP_lo_user', args=[], offset=0)
        ])

        # Real life example:
        # push_object_address
        # deref
        # dup
        # bra 4
        # lit0
        # skip 3
        # lit4
        # minus
        # deref
        lst = p.parse_expr([
            0x97, 0x6, 0x12, 0x28, 0x4, 0x0, 0x30, 0x2F, 0x3, 0x0, 0x34, 0x1C,
            0x6
        ])
        self.assertEqual(len(lst), 9)
        self.assertEqual(lst, [
            DWARFExprOp(op=0x97,
                        op_name='DW_OP_push_object_address',
                        args=[],
                        offset=0),
            DWARFExprOp(op=0x6, op_name='DW_OP_deref', args=[], offset=1),
            DWARFExprOp(op=0x12, op_name='DW_OP_dup', args=[], offset=2),
            DWARFExprOp(op=0x28, op_name='DW_OP_bra', args=[4], offset=3),
            DWARFExprOp(op=0x30, op_name='DW_OP_lit0', args=[], offset=6),
            DWARFExprOp(op=0x2f, op_name='DW_OP_skip', args=[3], offset=7),
            DWARFExprOp(op=0x34, op_name='DW_OP_lit4', args=[], offset=10),
            DWARFExprOp(op=0x1c, op_name='DW_OP_minus', args=[], offset=11),
            DWARFExprOp(op=0x6, op_name='DW_OP_deref', args=[], offset=12)
        ])
Ejemplo n.º 6
0
def get_var_offset(filename, function_name, var_name):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Find the function
                if DIE.tag == "DW_TAG_subprogram":
                    fname = ""
                    base = 0
                    for attr in itervalues(DIE.attributes):
                        if attr.name == "DW_AT_name":
                            fname = attr.value
                    if fname == function_name:
                        for CHILD in DIE.iter_children():
                            if CHILD.tag == "DW_TAG_variable" or CHILD.tag == "DW_TAG_formal_parameter":
                                right_name = False
                                location = 0
                                for attr in itervalues(CHILD.attributes):
                                    if attr.name == "DW_AT_name":
                                        if attr.value == var_name:
                                            right_name = True
                                    # Check if this attribute contains location information
                                    if attr.name == "DW_AT_location":
                                        loc = loc_parser.parse_from_attribute(attr,
                                                                              CU['version'])
                                        if isinstance(loc, LocationExpr):
                                            parser = DWARFExprParser(dwarfinfo.structs)
                                            parsed = parser.parse_expr(loc.loc_expr)
                                            for op in parsed:
                                                if op.op_name == 'DW_OP_fbreg':
                                                    location = op.args[0]
                                if right_name:
                                    return location
Ejemplo n.º 7
0
class ReStructurer(DWARFStructurer):
    def __init__(self, fp, **kwargs):
        super().__init__()

        self.elf = ELFFile(fp)
        self.dwarf = self.elf.get_dwarf_info()
        self.expr_parser = DWARFExprParser(self.dwarf.structs)
        self.loc_parser = self.dwarf.location_lists()
        self.arch = ELF.extract_arch(self.elf)

    @classmethod
    def rewrite_dwarf(cls, in_path, out_path, **kwargs):
        with open(in_path, 'rb') as fp:
            structurer = cls(fp, **kwargs)
            structure = structurer.run()

        serial = serialize(structure, structurer.arch)
        dump_elf(serial, structurer.arch, out_path, in_path)

    def get_attribute(self, die: DIE, name):
        attr = die.attributes.get(name, None)
        if attr is None:
            return None
        result = attr.value
        if attr.form == 'DW_FORM_exprloc':
            result = self.expr_parser.parse_expr(result)
        elif name == 'DW_AT_location' and attr.form == 'DW_FORM_sec_offset':
            base_addr = 0
            low_pc = die.cu.get_top_DIE().attributes.get('DW_AT_low_pc', None)
            if low_pc is not None:
                base_addr = low_pc.value
            loc_list = self.loc_parser.get_location_list_at_offset(result)
            result = []
            for item in loc_list:
                if type(item) is locationlists.LocationEntry:
                    result.append(
                        LocationEntry(
                            base_addr + item.begin_offset,
                            base_addr + item.end_offset,
                            self.expr_parser.parse_expr(item.loc_expr)))
                elif type(item) is locationlists.BaseAddressEntry:
                    base_addr = item.base_address
                else:
                    raise TypeError("What kind of loclist entry is this?")
        elif attr.form == 'DW_FORM_addr':
            result = Address(result)
        elif name == 'DW_AT_type':
            result = die.cu.get_DIE_from_refaddr(result + die.cu.cu_offset)
        return result

    @staticmethod
    def filter_children(die, tag):
        for child in die.iter_children():
            if child.tag == tag:
                yield child

    def get_expression_attribute(self, die, tag):
        expr_list = self.get_attribute(die, tag)
        if expr_list is None:
            return None
        return self.expr_parser.parse_expr(expr_list)

    def get_ranges(self, die):
        ranges = die.attributes.get('DW_AT_ranges', None)
        if ranges is not None:
            return self.dwarf.range_lists().get_range_list_at_offset(
                ranges.value)
        low_pc = die.attributes.get('DW_AT_low_pc', None)
        high_pc = die.attributes.get('DW_AT_high_pc', None)
        if low_pc is not None and high_pc is not None:
            # TODO base addresses???
            fixed_high_pc = high_pc.value if high_pc.form == 'DW_FORM_addr' else low_pc.value + high_pc.value
            return [RangeEntry(low_pc.value, fixed_high_pc)]
        if low_pc is not None or high_pc is not None:
            raise Exception(
                'Strange ranges - one but not both of low_pc + high_pc')
        return []

    def get_abstract_origin(self, die):
        r = self.get_attribute(die, 'DW_AT_abstract_origin')
        if r is None:
            return None
        assert type(r) is int
        return die.cu.get_DIE_from_refaddr(die.cu.cu_offset + r)

    def root_get_units(self):
        return list(self.dwarf.iter_CUs())

    def unit_get_filename(self, handler: CompileUnit):
        return self.get_attribute(handler.get_top_DIE(), 'DW_AT_name')

    def unit_get_language(self, handler: CompileUnit):
        return self.get_attribute(handler.get_top_DIE(), 'DW_AT_language')

    def unit_get_variables(self, handler: CompileUnit):
        return self.filter_children(handler.get_top_DIE(), 'DW_TAG_variable')

    def unit_get_functions(self, handler: CompileUnit):
        return self.filter_children(handler.get_top_DIE(), 'DW_TAG_subprogram')

    def unit_get_ranges(self, handler):
        return self.get_ranges(handler.get_top_DIE())

    def unit_get_comp_dir(self, handler: CompileUnit):
        return self.get_attribute(handler.get_top_DIE(), 'DW_AT_comp_dir')

    def unit_get_lines(self, handler: CompileUnit):
        lineprog = self.dwarf.line_program_for_CU(handler)
        if lineprog is None:
            return None

        entries = lineprog.get_entries()
        states = [entry.state for entry in entries if entry.state is not None]
        if not states:
            return None

        file_cache = {}
        for state in states:
            if state.file in file_cache:
                filename = file_cache[state.file]
            else:
                file_entry = lineprog.header['file_entry'][state.file - 1]
                if file_entry["dir_index"] == 0:
                    filename = file_entry.name.decode()
                else:
                    filename = os.path.join(
                        lineprog.header["include_directory"][
                            file_entry["dir_index"] - 1].decode(),
                        file_entry.name.decode())
                file_cache[state.file] = filename

            state.file = filename

        return states

    def unit_get_producer(self, handler: CompileUnit):
        result = self.get_attribute(handler.get_top_DIE(), 'DW_AT_producer')
        if result is None:
            result = super().unit_get_producer(handler)
        return result

    def function_get_ranges(self, handler):
        return self.get_ranges(handler)

    def function_get_name(self, handler: DIE):
        return self.get_attribute(handler, 'DW_AT_name')

    def function_get_return_type(self, handler: DIE):
        return self.get_attribute(handler, 'DW_AT_type')

    def function_get_noreturn(self, handler: DIE):
        return handler.attributes.get('DW_AT_noreturn', False)

    def function_get_inline(self, handler: DIE):
        return self.get_attribute(handler, 'DW_AT_inline')

    def function_get_abstract_origin(self, handler: DIE):
        return self.get_abstract_origin(handler)

    def function_get_parameters(self, handler: DIE):
        return self.filter_children(handler, 'DW_TAG_formal_parameter')

    def function_get_variables(self, handler):
        return self.filter_children(handler, 'DW_TAG_variable')

    def function_get_lexicalblocks(self, handler):
        return self.filter_children(handler, 'DW_TAG_lexical_block')

    def lexicalblock_get_ranges(self, handler):
        return self.get_ranges(handler)

    def lexicalblock_get_variables(self, handler):
        return self.filter_children(handler, 'DW_TAG_variable')

    def lexicalblock_get_lexicalblocks(self, handler):
        return self.filter_children(handler, 'DW_TAG_lexical_block')

    def parameter_get_name(self, handler):
        return self.get_attribute(handler, 'DW_AT_name')

    def parameter_get_type(self, handler):
        return self.get_attribute(handler, 'DW_AT_type')

    def variable_get_location(self, handler):
        return self.get_attribute(handler, 'DW_AT_location')

    def variable_get_name(self, handler):
        return self.get_attribute(handler, 'DW_AT_name')

    def variable_get_type(self, handler):
        return self.get_attribute(handler, 'DW_AT_type')

    def type_ptr_of(self, handler: DIE):
        if getattr(handler, 'tag', None) == 'DW_TAG_pointer_type':
            subty = self.get_attribute(handler, 'DW_AT_type')
            if subty is None:
                return VOID
            return subty
        return None

    def type_const_of(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_const_type':
            subty = self.get_attribute(handler, 'DW_AT_type')
            if subty is None:
                return VOID
            return subty
        return None

    def type_volatile_of(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_volatile_type':
            subty = self.get_attribute(handler, 'DW_AT_type')
            if subty is None:
                return VOID
            return subty
        return None

    def type_array_of(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_array_type':
            return self.get_attribute(handler, 'DW_AT_type')
        return None

    def type_array_size(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_array_type':
            children = list(
                self.filter_children(handler, 'DW_TAG_subrange_type'))
            if children:
                return self.get_attribute(children[0], 'DW_AT_count')
        return None

    def type_struct_name(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_structure_type':
            return self.get_attribute(handler, 'DW_AT_name')
        return None

    def type_struct_size(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_structure_type':
            return self.get_attribute(handler, 'DW_AT_byte_size')
        return None

    def type_struct_members(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_structure_type':
            return self.filter_children(handler, 'DW_TAG_member')
        return None

    def type_struct_member_name(self, handler):
        return self.get_attribute(handler, 'DW_AT_name')

    def type_struct_member_type(self, handler):
        return self.get_attribute(handler, 'DW_AT_type')

    def type_struct_member_offset(self, handler):
        return self.get_attribute(handler, 'DW_AT_data_member_location')

    def type_union_name(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_union_type':
            return self.get_attribute(handler, 'DW_AT_name')
        return None

    def type_union_size(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_union_type':
            return self.get_attribute(handler, 'DW_AT_byte_size')
        return None

    def type_union_members(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_union_type':
            return self.filter_children(handler, 'DW_TAG_member')
        return None

    def type_union_member_name(self, handler):
        return self.get_attribute(handler, 'DW_AT_name')

    def type_union_member_type(self, handler):
        return self.get_attribute(handler, 'DW_AT_type')

    def type_union_member_offset(self, handler):
        return self.get_attribute(handler, 'DW_AT_data_member_location')

    def type_enum_name(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_enumeration_type':
            return self.get_attribute(handler, 'DW_AT_name')
        return None

    def type_enum_type(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_enumeration_type':
            return self.get_attribute(handler, 'DW_AT_type')
        return None

    def type_enum_size(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_enumeration_type':
            return self.get_attribute(handler, 'DW_AT_byte_size')
        return None

    def type_enum_members(self, handler):
        return self.filter_children(handler, "DW_TAG_enumerator")

    def type_enum_member_name(self, handler):
        return self.get_attribute(handler, "DW_AT_name")

    def type_enum_member_value(self, handler):
        return self.get_attribute(handler, "DW_AT_const_value")

    def type_func_args(self, handler):
        if getattr(handler, 'tag', None) == "DW_TAG_subroutine_type":
            return self.filter_children(handler, "DW_TAG_formal_parameter")
        return None

    def type_func_arg_type(self, handler):
        return self.get_attribute(handler, "DW_AT_type")

    def type_typedef_name(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_typedef':
            return self.get_attribute(handler, 'DW_AT_name')
        return None

    def type_typedef_of(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_typedef':
            return self.get_attribute(handler, 'DW_AT_type')
        return None

    def type_basic_name(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_base_type':
            return self.get_attribute(handler, 'DW_AT_name')
        return None

    def type_basic_encoding(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_base_type':
            return self.get_attribute(handler, 'DW_AT_encoding')
        return None

    def type_basic_size(self, handler):
        if getattr(handler, 'tag', None) == 'DW_TAG_base_type':
            return self.get_attribute(handler, 'DW_AT_byte_size')
        return None

    def type_is_void(self, handler):
        return handler is VOID