Esempio n. 1
0
 def __init__(self, elffile, fileoffset, fstart, container):
     self.elf = elffile
     self.lsda_offset = fileoffset
     self.entry_structs = DWARFStructs(True, 64, 8)
     self.entries = []
     self.fstart = fstart
     self.sz = container.functions[fstart].sz
     self._formats = self._eh_encoding_to_field(self.entry_structs)
     self.actions = []
     self.ttentries = OrderedDict()
     self.typetable_offset_present = False
     self._parse_lsda()
Esempio n. 2
0
    def __init__(self, stream, bits, little_endian=True):
        self.address = None
        self.base_offset = None
        self.stream = stream

        if bits in (32, 64):
            dwarf_format = bits
        else:
            raise ValueError(
                "Unsupported bits value %d. Expect either 32 or 64." % bits)

        self.entry_structs = DWARFStructs(little_endian=little_endian,
                                          dwarf_format=dwarf_format,
                                          address_size=bits // 8)
        self._formats = self._eh_encoding_to_field(self.entry_structs)
Esempio n. 3
0
    def test_lineprog_header(self):
        ds = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4)

        c = ds.Dwarf_lineprog_header.parse(
            b'\x04\x10\x00\x00' +  # initial lenght
            b'\x05\x02' +  # version
            b'\x20\x00\x00\x00' +  # header length
            b'\x05\x10\x40\x50' +  # until and including line_range
            b'\x06' +  # opcode_base
            b'\x00\x01\x04\x08\x0C' +  # standard_opcode_lengths
            # 2 dir names followed by a NULL
            b'\x61\x62\x00\x70\x00\x00' +
            # a file entry
            b'\x61\x72\x00\x0C\x0D\x0F' +
            # and another entry
            b'\x45\x50\x51\x00\x86\x12\x07\x08' +
            # followed by NULL
            b'\x00')

        self.assertEqual(c.version, 0x205)
        self.assertEqual(c.opcode_base, 6)
        self.assertEqual(c.standard_opcode_lengths, [0, 1, 4, 8, 12])
        self.assertEqual(c.include_directory, [b'ab', b'p'])
        self.assertEqual(len(c.file_entry), 2)
        self.assertEqual(c.file_entry[0].name, b'ar')
        self.assertEqual(c.file_entry[1].name, b'EPQ')
        self.assertEqual(c.file_entry[1].dir_index, 0x12 * 128 + 6)
Esempio n. 4
0
class TestParseExpr(unittest.TestCase):
    structs32 = DWARFStructs(little_endian=True,
                             dwarf_format=32,
                             address_size=4)

    def setUp(self):
        set_global_machine_arch('x64')

    def test_single(self):
        p = DWARFExprParser(self.structs32)
        lst = p.parse_expr([0x1b])
        self.assertEqual(lst,
                         [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[])])

        lst = p.parse_expr([0x90, 16])
        self.assertEqual(
            lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16])])

        lst = p.parse_expr([0xe0])
        self.assertEqual(len(lst), 1)
        # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so
        # check for both to prevent non-determinism.
        self.assertIn(lst[0], [
            DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address',
                        args=[]),
            DWARFExprOp(op=0xe0, op_name='DW_OP_lo_user', args=[])
        ])
Esempio n. 5
0
class TestExprDumper(unittest.TestCase):
    structs32 = DWARFStructs(
            little_endian=True,
            dwarf_format=32,
            address_size=4)

    def setUp(self):
        self.visitor = ExprDumper(self.structs32)
        set_global_machine_arch('x64')

    def test_basic_single(self):
        self.visitor.process_expr([0x1b])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_div')

        self.setUp()
        self.visitor.process_expr([0x74, 0x82, 0x01])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_breg4 (rsi): 130')

        self.setUp()
        self.visitor.process_expr([0x91, 0x82, 0x01])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_fbreg: 130')

        self.setUp()
        self.visitor.process_expr([0x51])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_reg1 (rdx)')

        self.setUp()
        self.visitor.process_expr([0x90, 16])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_regx: 16 (rip)')

        self.setUp()
        self.visitor.process_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_bit_piece: 1295 144')

    def test_basic_sequence(self):
        self.visitor.process_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_addr: 201; DW_OP_deref; DW_OP_deref')

        self.setUp()
        self.visitor.process_expr([0x15, 0xFF, 0x0b, 0xf1, 0xff])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_pick: 255; DW_OP_const2s: -15')

        self.setUp()
        self.visitor.process_expr([0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1e])
        self.assertEqual(self.visitor.get_str(),
            'DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul')
def process_compile_unit(dwarf_info, pyelftools_elf_file, compile_unit):
    # We need this to parse the DW_TAG_variable DW_AT_location
    # This has to be done for each compile unit (I think I got errors otherwise)
    structs = DWARFStructs(little_endian=pyelftools_elf_file.little_endian,
                           dwarf_format=compile_unit.dwarf_format(),
                           address_size=compile_unit['address_size'])

    get_compile_unit_types(compile_unit)

    # A CU provides a simple API to iterate over all the DIEs in it.
    for DIE in compile_unit.iter_DIEs():
        process_die(DIE, structs, compile_unit)
Esempio n. 7
0
class TestExprDumper(unittest.TestCase):
    structs32 = DWARFStructs(little_endian=True,
                             dwarf_format=32,
                             address_size=4)

    def setUp(self):
        self.visitor = ExprDumper(self.structs32)
        set_global_machine_arch('x64')

    def test_basic_single(self):
        self.assertEqual(self.visitor.dump_expr([0x1b]), 'DW_OP_div')

        self.assertEqual(self.visitor.dump_expr([0x74, 0x82, 0x01]),
                         'DW_OP_breg4 (rsi): 130')

        self.assertEqual(self.visitor.dump_expr([0x91, 0x82, 0x01]),
                         'DW_OP_fbreg: 130')

        self.assertEqual(self.visitor.dump_expr([0x51]), 'DW_OP_reg1 (rdx)')

        self.assertEqual(self.visitor.dump_expr([0x90, 16]),
                         'DW_OP_regx: 16 (rip)')

        self.assertEqual(
            self.visitor.dump_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]),
            'DW_OP_bit_piece: 1295 144')

        self.assertEqual(
            self.visitor.dump_expr(
                [0x0e, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00]),
            'DW_OP_const8u: 71777214294589695')

    def test_basic_sequence(self):
        self.assertEqual(
            self.visitor.dump_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06]),
            'DW_OP_addr: 201; DW_OP_deref; DW_OP_deref')

        self.assertEqual(
            self.visitor.dump_expr([0x15, 0xFF, 0x0b, 0xf1, 0xff]),
            'DW_OP_pick: 255; DW_OP_const2s: -15')

        self.assertEqual(
            self.visitor.dump_expr([0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1e]),
            'DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul')

        # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so
        # check for both to prevent non-determinism.
        self.assertIn(self.visitor.dump_expr([0x08, 0x0f, 0xe0]),
                      ('DW_OP_const1u: 15; DW_OP_GNU_push_tls_address',
                       'DW_OP_const1u: 15; DW_OP_lo_user'))
Esempio n. 8
0
    def test_ehframe_fde_with_lsda_pointer(self):
        # CIE and FDE dumped from exceptions_0, offset 0xcc0
        # binary is at https://github.com/angr/binaries/blob/master/tests/x86_64/exceptions_0
        data = (
            b'' +
            # CIE
            b'\x1c\x00\x00\x00' +  # length
            b'\x00\x00\x00\x00' +  # ID
            b'\x01' +  # version
            b'\x7a\x50\x4c\x52\x00' +  # augmentation string
            b'\x01' +  # code alignment
            b'\x78' +  # data alignment
            b'\x10' +  # return address register
            b'\x07' +  # augmentation data length
            b'\x9b' +  # personality function pointer encoding
            b'\x3d\x13\x20\x00' +  # personality function pointer
            b'\x1b' +  # LSDA pointer encoding
            b'\x1b' +  # FDE encoding
            b'\x0c\x07\x08\x90' +  # initial instructions
            b'\x01\x00\x00' +
            # FDE
            b'\x24\x00\x00\x00' +  # length
            b'\x24\x00\x00\x00' +  # CIE reference pointer
            b'\x62\xfd\xff\xff' +  # pc begin
            b'\x89\x00\x00\x00' +  # pc range
            b'\x04' +  # augmentation data length
            b'\xb7\x00\x00\x00' +  # LSDA pointer
            b'\x41\x0e\x10\x86' +  # initial instructions
            b'\x02\x43\x0d\x06' + b'\x45\x83\x03\x02' + b'\x7f\x0c\x07\x08' +
            b'\x00\x00\x00')
        s = BytesIO(data)

        structs = DWARFStructs(little_endian=True,
                               dwarf_format=32,
                               address_size=8)
        cfi = CallFrameInfo(s, len(data), 0, structs, for_eh_frame=True)
        entries = cfi.get_entries()

        self.assertEqual(len(entries), 2)
        self.assertIsInstance(entries[0], CIE)
        self.assertIn('LSDA_encoding', entries[0].augmentation_dict)
        # check LSDA encoding
        lsda_encoding = entries[0].augmentation_dict['LSDA_encoding']
        basic_encoding = lsda_encoding & 0x0f
        modifier = lsda_encoding & 0xf0
        self.assertEqual(basic_encoding,
                         DW_EH_encoding_flags['DW_EH_PE_sdata4'])
        self.assertEqual(modifier, DW_EH_encoding_flags['DW_EH_PE_pcrel'])
        self.assertIsInstance(entries[1], FDE)
        self.assertEqual(entries[1].lsda_pointer, 232)
Esempio n. 9
0
    def test_describe_CFI_instructions(self):
        # The data here represents a single CIE
        data = (
            b'' + b'\x16\x00\x00\x00' +  # length
            b'\xff\xff\xff\xff' +  # CIE_id
            b'\x03\x00\x04\x7c' +  # version, augmentation, caf, daf
            b'\x08' +  # return address
            b'\x0c\x07\x02' + b'\x10\x02\x07\x03\x01\x02\x00\x00\x06\x06')
        s = BytesIO(data)

        structs = DWARFStructs(little_endian=True,
                               dwarf_format=32,
                               address_size=4)
        cfi = CallFrameInfo(s, len(data), structs)
        entries = cfi.get_entries()

        set_global_machine_arch('x86')
        self.assertEqual(describe_CFI_instructions(entries[0]), (
            '  DW_CFA_def_cfa: r7 (edi) ofs 2\n' +
            '  DW_CFA_expression: r2 (edx) (DW_OP_addr: 201; DW_OP_deref; DW_OP_deref)\n'
        ))
Esempio n. 10
0
    def _make_program_in_stream(self, stream):
        """ Create a LineProgram from the given program encoded in a stream
        """
        ds = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4)
        header = ds.Dwarf_lineprog_header.parse(
            b'\x04\x10\x00\x00' +    # initial lenght
            b'\x03\x00' +            # version
            b'\x20\x00\x00\x00' +    # header length
            b'\x01\x01\x01\x0F' +    # flags
            b'\x0A' +                # opcode_base
            b'\x00\x01\x04\x08\x0C\x01\x01\x01\x00' + # standard_opcode_lengths
            # 2 dir names followed by a NULL
            b'\x61\x62\x00\x70\x00\x00' +
            # a file entry
            b'\x61\x72\x00\x0C\x0D\x0F' +
            # and another entry
            b'\x45\x50\x51\x00\x86\x12\x07\x08' +
            # followed by NULL
            b'\x00')

        lp = LineProgram(header, stream, ds, 0, len(stream.getvalue()))
        return lp
Esempio n. 11
0
class TestParseExpr(unittest.TestCase):
    structs32 = DWARFStructs(little_endian=True,
                             dwarf_format=32,
                             address_size=4)

    def setUp(self):
        set_global_machine_arch('x64')

    def test_single(self):
        p = DWARFExprParser(self.structs32)
        lst = p.parse_expr([0x1b])
        self.assertEqual(lst,
                         [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[])])

        lst = p.parse_expr([0x90, 16])
        self.assertEqual(
            lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16])])

        lst = p.parse_expr([0xe0])
        self.assertEqual(lst, [
            DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address', args=[])
        ])
Esempio n. 12
0
    def __init__(self, elffile):
        section = elffile.get_section_by_name(".debug")
        section_data = section.data()
        self.section_size = section_size = len(section_data)
        self.stm = stm = BytesIO()
        self.stm.write(section_data)
        self.stm.seek(0, 0)

        lsection = elffile.get_section_by_name(".line")
        if lsection:
            self.linestream = BytesIO()
            self.linestream.write(lsection.data())
            self.linestream.seek(0, 0)

        self.config = DwarfConfig(little_endian=elffile.little_endian,
                                  default_address_size=elffile.elfclass // 8,
                                  machine_arch=elffile.get_machine_arch())

        self.structs = DWARFStructs(
            little_endian=self.config.little_endian,
            dwarf_format=32,
            address_size=self.config.default_address_size)
Esempio n. 13
0
    def test_spec_sample_d6(self):
        # D.6 sample in DWARFv3
        s = BytesIO()
        data = (
            b'' +
            # first comes the CIE
            b'\x20\x00\x00\x00' +  # length
            b'\xff\xff\xff\xff' +  # CIE_id
            b'\x03\x00\x04\x7c' +  # version, augmentation, caf, daf
            b'\x08' +  # return address
            b'\x0c\x07\x00' + b'\x08\x00' + b'\x07\x01' + b'\x07\x02' +
            b'\x07\x03' + b'\x08\x04' + b'\x08\x05' + b'\x08\x06' +
            b'\x08\x07' + b'\x09\x08\x01' + b'\x00' +

            # then comes the FDE
            b'\x28\x00\x00\x00' +  # length
            b'\x00\x00\x00\x00' +  # CIE_pointer (to CIE at 0)
            b'\x44\x33\x22\x11' +  # initial_location
            b'\x54\x00\x00\x00' +  # address range
            b'\x41' + b'\x0e\x0c' + b'\x41' + b'\x88\x01' + b'\x41' +
            b'\x86\x02' + b'\x41' + b'\x0d\x06' + b'\x41' + b'\x84\x03' +
            b'\x4b' + b'\xc4' + b'\x41' + b'\xc6' + b'\x0d\x07' + b'\x41' +
            b'\xc8' + b'\x41' + b'\x0e\x00' + b'\x00\x00')
        s.write(data)

        structs = DWARFStructs(little_endian=True,
                               dwarf_format=32,
                               address_size=4)
        cfi = CallFrameInfo(s, len(data), structs)
        entries = cfi.get_entries()

        self.assertEqual(len(entries), 2)
        self.assertIsInstance(entries[0], CIE)
        self.assertEqual(entries[0]['length'], 32)
        self.assertEqual(entries[0]['data_alignment_factor'], -4)
        self.assertEqual(entries[0]['return_address_register'], 8)
        self.assertEqual(len(entries[0].instructions), 11)
        self.assertInstruction(entries[0].instructions[0], 'DW_CFA_def_cfa',
                               [7, 0])
        self.assertInstruction(entries[0].instructions[8], 'DW_CFA_same_value',
                               [7])
        self.assertInstruction(entries[0].instructions[9], 'DW_CFA_register',
                               [8, 1])

        self.assertTrue(isinstance(entries[1], FDE))
        self.assertEqual(entries[1]['length'], 40)
        self.assertEqual(entries[1]['CIE_pointer'], 0)
        self.assertEqual(entries[1]['address_range'], 84)
        self.assertIs(entries[1].cie, entries[0])
        self.assertEqual(len(entries[1].instructions), 21)
        self.assertInstruction(entries[1].instructions[0],
                               'DW_CFA_advance_loc', [1])
        self.assertInstruction(entries[1].instructions[1],
                               'DW_CFA_def_cfa_offset', [12])
        self.assertInstruction(entries[1].instructions[9], 'DW_CFA_offset',
                               [4, 3])
        self.assertInstruction(entries[1].instructions[18],
                               'DW_CFA_def_cfa_offset', [0])
        self.assertInstruction(entries[1].instructions[20], 'DW_CFA_nop', [])

        # Now let's decode it...
        decoded_CIE = entries[0].get_decoded()
        self.assertEqual(decoded_CIE.reg_order, list(range(9)))
        self.assertEqual(len(decoded_CIE.table), 1)
        self.assertEqual(decoded_CIE.table[0]['cfa'].reg, 7)
        self.assertEqual(decoded_CIE.table[0]['pc'], 0)
        self.assertEqual(decoded_CIE.table[0]['cfa'].offset, 0)
        self.assertEqual(decoded_CIE.table[0][4].type, RegisterRule.SAME_VALUE)
        self.assertEqual(decoded_CIE.table[0][8].type, RegisterRule.REGISTER)
        self.assertEqual(decoded_CIE.table[0][8].arg, 1)

        decoded_FDE = entries[1].get_decoded()
        self.assertEqual(decoded_FDE.reg_order, list(range(9)))
        self.assertEqual(decoded_FDE.table[0]['cfa'].reg, 7)
        self.assertEqual(decoded_FDE.table[0]['cfa'].offset, 0)
        self.assertEqual(decoded_FDE.table[0]['pc'], 0x11223344)
        self.assertEqual(decoded_FDE.table[0][8].type, RegisterRule.REGISTER)
        self.assertEqual(decoded_FDE.table[0][8].arg, 1)
        self.assertEqual(decoded_FDE.table[1]['cfa'].reg, 7)
        self.assertEqual(decoded_FDE.table[1]['cfa'].offset, 12)
        self.assertEqual(decoded_FDE.table[2][8].type, RegisterRule.OFFSET)
        self.assertEqual(decoded_FDE.table[2][8].arg, -4)
        self.assertEqual(decoded_FDE.table[2][4].type, RegisterRule.SAME_VALUE)
        self.assertEqual(decoded_FDE.table[5]['pc'], 0x11223344 + 20)
        self.assertEqual(decoded_FDE.table[5][4].type, RegisterRule.OFFSET)
        self.assertEqual(decoded_FDE.table[5][4].arg, -12)
        self.assertEqual(decoded_FDE.table[6]['pc'], 0x11223344 + 64)
        self.assertEqual(decoded_FDE.table[9]['pc'], 0x11223344 + 76)
Esempio n. 14
0
class LSDAExceptionTable:
    """
    LSDA exception table parser.

    TODO: Much of this class should be eventually moved to pyelftools.
    """
    def __init__(self, stream, bits, little_endian=True):
        self.address = None
        self.base_offset = None
        self.stream = stream

        if bits in (32, 64):
            dwarf_format = bits
        else:
            raise ValueError(
                "Unsupported bits value %d. Expect either 32 or 64." % bits)

        self.entry_structs = DWARFStructs(little_endian=little_endian,
                                          dwarf_format=dwarf_format,
                                          address_size=bits // 8)
        self._formats = self._eh_encoding_to_field(self.entry_structs)

    @staticmethod
    def _eh_encoding_to_field(entry_structs):
        """
        Shamelessly copied from pyelftools since the original method is a bounded method.

        Return a mapping from basic encodings (DW_EH_encoding_flags) the
        corresponding field constructors (for instance
        entry_structs.Dwarf_uint32).
        """
        return {
            DW_EH_encoding_flags['DW_EH_PE_absptr']:
            entry_structs.Dwarf_target_addr,
            DW_EH_encoding_flags['DW_EH_PE_uleb128']:
            entry_structs.Dwarf_uleb128,
            DW_EH_encoding_flags['DW_EH_PE_udata2']:
            entry_structs.Dwarf_uint16,
            DW_EH_encoding_flags['DW_EH_PE_udata4']:
            entry_structs.Dwarf_uint32,
            DW_EH_encoding_flags['DW_EH_PE_udata8']:
            entry_structs.Dwarf_uint64,
            DW_EH_encoding_flags['DW_EH_PE_sleb128']:
            entry_structs.Dwarf_sleb128,
            DW_EH_encoding_flags['DW_EH_PE_sdata2']: entry_structs.Dwarf_int16,
            DW_EH_encoding_flags['DW_EH_PE_sdata4']: entry_structs.Dwarf_int32,
            DW_EH_encoding_flags['DW_EH_PE_sdata8']: entry_structs.Dwarf_int64,
        }

    def parse_lsda(self, address, offset):
        self.address = address
        self.base_offset = offset
        self.stream.seek(offset)
        header = self._parse_lsda_header()

        csrs = []  # type: List[CallSiteEntry]
        start_offset = self.stream.tell()
        while self.stream.tell() - start_offset < header.call_site_table_len:
            csr = self._parse_call_site_entry(header.call_site_encoding)
            if csr is not None:
                csrs.append(csr)

        return csrs

    def _parse_lsda_header(self):

        # lpstart
        lpstart_encoding = self.stream.read(1)[0]
        if lpstart_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']:
            base_encoding = lpstart_encoding & 0x0f
            modifier = lpstart_encoding & 0xf0

            lpstart = struct_parse(
                Struct('dummy', self._formats[base_encoding]('LPStart')),
                self.stream)['LPStart']

            if modifier == 0:
                pass
            elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']:
                lpstart += self.address + (self.stream.tell() -
                                           self.base_offset)
            else:
                raise NotImplementedError("Unsupported modifier %#x." %
                                          modifier)

        else:
            lpstart = None

        # ttype
        ttype_encoding = self.stream.read(1)[0]
        if ttype_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']:
            ttype_offset = struct_parse(
                Struct('dummy', self.entry_structs.Dwarf_uleb128('TType')),
                self.stream)['TType']
        else:
            ttype_offset = None

        # call site table length
        cstable_encoding = self.stream.read(1)[0]
        cstable_length = struct_parse(
            Struct('dummy', self.entry_structs.Dwarf_uleb128('CSTable')),
            self.stream)['CSTable']

        return ExceptionTableHeader(
            lpstart,
            ttype_encoding,
            ttype_offset,
            cstable_encoding,
            cstable_length,
        )

    def _parse_call_site_entry(self, encoding):

        base_encoding = encoding & 0x0f
        modifier = encoding & 0xf0

        # header
        s = struct_parse(
            Struct(
                'CallSiteEntry',
                self._formats[base_encoding]('cs_start'),
                self._formats[base_encoding]('cs_len'),
                self._formats[base_encoding]('cs_lp'),
                self.entry_structs.Dwarf_uleb128('cs_action'),
            ), self.stream)

        cs_start = s['cs_start']
        cs_len = s['cs_len']
        cs_lp = s['cs_lp']
        cs_action = s['cs_action']

        if modifier == 0:
            pass
        else:
            raise NotImplementedError(
                "Unsupported modifier for CallSiteEntry: %#x." % modifier)

        return CallSiteEntry(cs_start, cs_len, cs_lp, cs_action)
class TestParseExpr(unittest.TestCase):
    structs32 = DWARFStructs(little_endian=True,
                             dwarf_format=32,
                             address_size=4)

    def setUp(self):
        set_global_machine_arch('x64')

    def test_single(self):
        p = DWARFExprParser(self.structs32)
        lst = p.parse_expr([0x1b])
        self.assertEqual(
            lst,
            [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[], offset=0)])

        lst = p.parse_expr([0x90, 16])
        self.assertEqual(
            lst,
            [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16], offset=0)])

        lst = p.parse_expr([0xe0])
        self.assertEqual(len(lst), 1)
        # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so
        # check for both to prevent non-determinism.
        self.assertIn(lst[0], [
            DWARFExprOp(op=0xe0,
                        op_name='DW_OP_GNU_push_tls_address',
                        args=[],
                        offset=0),
            DWARFExprOp(op=0xe0, op_name='DW_OP_lo_user', args=[], offset=0)
        ])

        # Real life example:
        # push_object_address
        # deref
        # dup
        # bra 4
        # lit0
        # skip 3
        # lit4
        # minus
        # deref
        lst = p.parse_expr([
            0x97, 0x6, 0x12, 0x28, 0x4, 0x0, 0x30, 0x2F, 0x3, 0x0, 0x34, 0x1C,
            0x6
        ])
        self.assertEqual(len(lst), 9)
        self.assertEqual(lst, [
            DWARFExprOp(op=0x97,
                        op_name='DW_OP_push_object_address',
                        args=[],
                        offset=0),
            DWARFExprOp(op=0x6, op_name='DW_OP_deref', args=[], offset=1),
            DWARFExprOp(op=0x12, op_name='DW_OP_dup', args=[], offset=2),
            DWARFExprOp(op=0x28, op_name='DW_OP_bra', args=[4], offset=3),
            DWARFExprOp(op=0x30, op_name='DW_OP_lit0', args=[], offset=6),
            DWARFExprOp(op=0x2f, op_name='DW_OP_skip', args=[3], offset=7),
            DWARFExprOp(op=0x34, op_name='DW_OP_lit4', args=[], offset=10),
            DWARFExprOp(op=0x1c, op_name='DW_OP_minus', args=[], offset=11),
            DWARFExprOp(op=0x6, op_name='DW_OP_deref', args=[], offset=12)
        ])
Esempio n. 16
0
class LSDATable():
    """
    The LSDA Table in GCC-Frontend Compilers (All GCC Languages) implements the 
    LSDA using __gxx_personality_v0. Thus this should work for all GCC-languages 
    we care about, but that isn't guaranteed.
    """
    def __init__(self, elffile, fileoffset, fstart, container):
        self.elf = elffile
        self.lsda_offset = fileoffset
        self.entry_structs = DWARFStructs(True, 64, 8)
        self.entries = []
        self.fstart = fstart
        self.sz = container.functions[fstart].sz
        self._formats = self._eh_encoding_to_field(self.entry_structs)
        self.actions = []
        self.ttentries = OrderedDict()
        self.typetable_offset_present = False
        self._parse_lsda()

    @staticmethod
    def _eh_encoding_to_field(entry_structs):
        """
        Shamelessly copied from pyelftools since the original method is a bounded method.
        Return a mapping from basic encodings (DW_EH_encoding_flags) the
        corresponding field constructors (for instance
        entry_structs.Dwarf_uint32).
        """
        return {
            DW_EH_encoding_flags['DW_EH_PE_absptr']:
            entry_structs.Dwarf_target_addr,
            DW_EH_encoding_flags['DW_EH_PE_uleb128']:
            entry_structs.Dwarf_uleb128,
            DW_EH_encoding_flags['DW_EH_PE_udata2']:
            entry_structs.Dwarf_uint16,
            DW_EH_encoding_flags['DW_EH_PE_udata4']:
            entry_structs.Dwarf_uint32,
            DW_EH_encoding_flags['DW_EH_PE_udata8']:
            entry_structs.Dwarf_uint64,
            DW_EH_encoding_flags['DW_EH_PE_sleb128']:
            entry_structs.Dwarf_sleb128,
            DW_EH_encoding_flags['DW_EH_PE_sdata2']: entry_structs.Dwarf_int16,
            DW_EH_encoding_flags['DW_EH_PE_sdata4']: entry_structs.Dwarf_int32,
            DW_EH_encoding_flags['DW_EH_PE_sdata8']: entry_structs.Dwarf_int64,
        }

    def _parse_lsda(self):
        self._parse_lsda_header()
        self._parse_lsda_entries()

    def _parse_lsda_header(self):
        # https://www.airs.com/blog/archives/464

        self.elf.seek(self.lsda_offset)

        lpstart_raw = self.elf.read(1)[0]
        lpstart = None
        if lpstart_raw != DW_EH_encoding_flags['DW_EH_PE_omit']:
            # See https://www.airs.com/blog/archives/460, it should be omit in
            # practice
            raise Exception("We do not handle this case for now")
            base_encoding = lpstart_raw & 0x0F
            modifier = lpstart_raw & 0xF0

            lpstart = struct_parse(
                Struct('dummy', self._formats[base_encoding]('LPStart')),
                self.elf)['LPStart']

            if modifier == 0:
                pass
            elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']:
                lpstart += self.address + (self.elf.tell() - self.base_offset)
            else:
                raise Exception("what")

        typetable_encoding = self.elf.read(1)[0]
        typetable_offset = None
        # NOW TODO : the encoding is the right one + 1, which is weird

        self.typetable_offset = 0

        if typetable_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']:
            self.typetable_offset = struct_parse(
                Struct('dummy', self.entry_structs.Dwarf_uleb128('TType')),
                self.elf)['TType']
            self.typetable_offset_present = True
        else:
            self.typetable_offset_present = False

        call_site_table_encoding = self.elf.read(1)[0]
        call_site_table_len = struct_parse(
            Struct('dummy', self.entry_structs.Dwarf_uleb128('CSTable')),
            self.elf)['CSTable']

        if self.typetable_offset_present == False:
            self.typetable_offset = call_site_table_len

        print("lpstart", lpstart_raw)
        print("lpstart_pcrel", lpstart)
        print("typetable_encoding", typetable_encoding)
        print("call_site_table_encoding", call_site_table_encoding)

        print("CALL SITE TABLE LENGTH %d" % call_site_table_len)
        print("TYPETABLE OFFSET %d" % self.typetable_offset)

        self.end_label = ".LLSDATT%x" % self.fstart
        self.table_label = ".LLSDATTD%x" % self.fstart
        self.action_label = ".LLSDACSE%x" % self.fstart
        self.callsite_label = ".LLSDACSB%x" % self.fstart
        self.ttable_prefix_label = ".LLSDATYP%x" % self.fstart

        # Need to construct some representation here.
        self.header = {
            "lpstart": lpstart_raw,
            "encoding": call_site_table_encoding,
            "typetable_encoding": typetable_encoding,
            "call_site_table_len": call_site_table_len,
            "cs_act_tt_total_len": self.
            typetable_offset,  # Don't forget to check typetable_offset_present
        }

    def _parse_lsda_entries(self):
        start_cs_offset = self.elf.tell()

        action_count = 0

        while self.elf.tell(
        ) - start_cs_offset < self.header["call_site_table_len"]:
            base_encoding = self.header["encoding"] & 0x0f
            modifier = self.header["encoding"] & 0xf0

            # Maybe we need to store the offset in the entry ?

            # header
            s = struct_parse(
                Struct(
                    'CallSiteEntry',
                    self._formats[base_encoding]('cs_start'),
                    self._formats[base_encoding]('cs_len'),
                    self._formats[base_encoding]('cs_lp'),
                    self.entry_structs.Dwarf_uleb128('cs_action'),
                ), self.elf)

            cs_action = s['cs_action']
            if cs_action != 0:
                action_offset_bytes = (cs_action + 1) >> 1
                action_count = max(action_count, action_offset_bytes)

            self.entries.append(s)

        processed_bytes = self.elf.tell() - start_cs_offset
        print("+++++ %d bytes read, %d to go, %d actions" %
              (processed_bytes, self.typetable_offset - processed_bytes,
               action_count))

        idx = action_count
        processed_action_count = 0

        while idx > 0:

            action = struct_parse(
                Struct("ActionEntry",
                       self.entry_structs.Dwarf_uint8('act_filter'),
                       self.entry_structs.Dwarf_uint8('act_next')), self.elf)

            print(">>>> ACTION ", action)
            self.actions.append(action)
            processed_action_count = processed_action_count + 1
            idx -= 1

        ttendloc = self.lsda_offset + self.typetable_offset + 3

        print("TT %x" % (ttendloc))
        for action in self.actions:

            type_bytes_offset = (action.act_filter * -8)
            ttentryloc = ttendloc + type_bytes_offset
            print("****** TT LOC: %x" % (ttentryloc))
            self.elf.seek(ttentryloc, io.SEEK_SET)
            ptrbytes = self.elf.read(8)
            ptr = struct.unpack("<Q", ptrbytes)[0]
            print("****** TT PTR: %x" % ptr)

            symbolized_target = 0
            if ptr != 0:
                symbolized_target = ptr + ttentryloc

            typeentry = {'address': symbolized_target}
            self.ttentries[action.act_filter] = typeentry
            print("****** TT x: %x" % (symbolized_target))

    def generate_tableoffset(self):
        ttoffset = ""
        if self.typetable_offset_present:
            ttoffset = ".uleb128 %s-%s" % (self.end_label, self.table_label)
        else:
            ttoffset = "# @TType Encoding is DW_EH_PE_omit, ignoring."
        return ttoffset

    def generate_header(self):
        print("generate header", self.fstart)
        print("generate table label", self.table_label)

        ttoffset = self.generate_tableoffset()

        table_header = """
.LFE%x:
    .section	.gcc_except_table,"a",@progbits
    .align 4
GCC_except_table%x:
.LLSDA%x:
    .byte 0x%x   # @LPStart encoding
    .byte 0x%x   # @TType Encoding
    %s
.LLSDATTD%x:
    .byte 0x1

        """ % (
            self.fstart,
            self.fstart,
            self.fstart,
            self.header["lpstart"],
            self.header["typetable_encoding"],
            ttoffset,
            self.fstart,
        )
        return table_header

    def generate_table(self):
        print("generate table", self.fstart)
        table = """
    .uleb128 %s-%s
.LLSDACSB%x:
%s
.LLSDACSE%x:
    %s
    .align 4
    %s
.LLSDATT%x:
    .p2align 2
        """ % (self.action_label, self.callsite_label, self.fstart,
               self.generate_callsites(), self.fstart, self.generate_actions(),
               self.generate_typetable(), self.fstart)
        return table

    def generate_callsites(self):
        #.LLSDACSB2:
        #   .uleb128 .LEHB4-.LFB2    ; uint8_t start
        #   .uleb128 .LEHE4-.LEHB4   ; uint8_t len
        #   .uleb128 .L19-.LFB2      ; uint8_t lp
        #   .uleb128 0x3             ; uint8_t action

        function_end = self.sz + self.fstart

        def callsite_ftr(entry):

            cbw_e = ""
            if self.fstart + entry["cs_start"] + entry[
                    "cs_len"] >= function_end:
                cbw_e = "E"
            jlo_e = ""
            if self.fstart + entry["cs_lp"] >= function_end:
                jlo_e = "E"
            """

            cse: The start of the instructions for the current call site, 
                 a byte offset from the landing pad base. This is encoded 
                 using the encoding from the header.
            cbw: The length of the instructions for the current call site, 
                 in bytes. This is encoded using the encoding from the header.
            jlo: A pointer to the landing pad for this sequence of instructions, 
                 or 0 if there isn’t one. This is a byte offset from the 
                 landing pad base. This is encoded using the encoding from the header.
            act: The action to take, an unsigned LEB128. 
                 This is 1 plus a byte offset into the action table. 
                 The value zero means that there is no action.

            """

            cse = "\t.uleb128 .LC%x-.L%x    \t# Call Site Entry (%u)" % (
                self.fstart + entry["cs_start"], self.fstart,
                entry["cs_start"])
            cbw = "\t.uleb128 .LC%s%x-.LC%x \t# Call Between (%u)" % (
                cbw_e, self.fstart + entry["cs_start"] + entry["cs_len"],
                self.fstart + entry["cs_start"], entry["cs_len"])
            jlo = "\t.uleb128 .LC%s%x-.L%x  \t# Jump Location (%u)" % (
                jlo_e, self.fstart + entry["cs_lp"], self.fstart,
                entry["cs_lp"])
            act = "\t.uleb128 0x%x          \t# Action\n" % (
                entry["cs_action"])
            return "\n".join([cse, cbw, jlo, act])

        return "\n".join(map(callsite_ftr, self.entries))

    def generate_typetable(self):

        ttable = ""
        i = 1
        for idx, tp in reversed(list(self.ttentries.items())):
            print("*******", idx)
            label = "%sE%s" % (self.ttable_prefix_label, i)
            i += 1

            target_label = ""
            if tp["address"] != 0:
                target_label = ".LC%x-." % (tp["address"])
                #target_label = ".LC%x" % (tp["address"])
            else:
                target_label = "0"

            ttable += """
%s:
     .quad %s
            """ % (label, target_label)

        return ttable

    def generate_actions(self):
        action_table = "\n"

        for action in self.actions:

            action_table += "# Action Filter and Next Record\n"
            action_table += "    .byte %u\n" % (action["act_filter"])
            action_table += "    .byte %u\n" % (action["act_next"])
            action_table += "\n"

        return action_table

    def generate_footer(self):
        return "%s:\n" % self.end_label