def __init__(self, elffile, fileoffset, fstart, container): self.elf = elffile self.lsda_offset = fileoffset self.entry_structs = DWARFStructs(True, 64, 8) self.entries = [] self.fstart = fstart self.sz = container.functions[fstart].sz self._formats = self._eh_encoding_to_field(self.entry_structs) self.actions = [] self.ttentries = OrderedDict() self.typetable_offset_present = False self._parse_lsda()
def __init__(self, stream, bits, little_endian=True): self.address = None self.base_offset = None self.stream = stream if bits in (32, 64): dwarf_format = bits else: raise ValueError( "Unsupported bits value %d. Expect either 32 or 64." % bits) self.entry_structs = DWARFStructs(little_endian=little_endian, dwarf_format=dwarf_format, address_size=bits // 8) self._formats = self._eh_encoding_to_field(self.entry_structs)
def test_lineprog_header(self): ds = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) c = ds.Dwarf_lineprog_header.parse( b'\x04\x10\x00\x00' + # initial lenght b'\x05\x02' + # version b'\x20\x00\x00\x00' + # header length b'\x05\x10\x40\x50' + # until and including line_range b'\x06' + # opcode_base b'\x00\x01\x04\x08\x0C' + # standard_opcode_lengths # 2 dir names followed by a NULL b'\x61\x62\x00\x70\x00\x00' + # a file entry b'\x61\x72\x00\x0C\x0D\x0F' + # and another entry b'\x45\x50\x51\x00\x86\x12\x07\x08' + # followed by NULL b'\x00') self.assertEqual(c.version, 0x205) self.assertEqual(c.opcode_base, 6) self.assertEqual(c.standard_opcode_lengths, [0, 1, 4, 8, 12]) self.assertEqual(c.include_directory, [b'ab', b'p']) self.assertEqual(len(c.file_entry), 2) self.assertEqual(c.file_entry[0].name, b'ar') self.assertEqual(c.file_entry[1].name, b'EPQ') self.assertEqual(c.file_entry[1].dir_index, 0x12 * 128 + 6)
class TestParseExpr(unittest.TestCase): structs32 = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) def setUp(self): set_global_machine_arch('x64') def test_single(self): p = DWARFExprParser(self.structs32) lst = p.parse_expr([0x1b]) self.assertEqual(lst, [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[])]) lst = p.parse_expr([0x90, 16]) self.assertEqual( lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16])]) lst = p.parse_expr([0xe0]) self.assertEqual(len(lst), 1) # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so # check for both to prevent non-determinism. self.assertIn(lst[0], [ DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address', args=[]), DWARFExprOp(op=0xe0, op_name='DW_OP_lo_user', args=[]) ])
class TestExprDumper(unittest.TestCase): structs32 = DWARFStructs( little_endian=True, dwarf_format=32, address_size=4) def setUp(self): self.visitor = ExprDumper(self.structs32) set_global_machine_arch('x64') def test_basic_single(self): self.visitor.process_expr([0x1b]) self.assertEqual(self.visitor.get_str(), 'DW_OP_div') self.setUp() self.visitor.process_expr([0x74, 0x82, 0x01]) self.assertEqual(self.visitor.get_str(), 'DW_OP_breg4 (rsi): 130') self.setUp() self.visitor.process_expr([0x91, 0x82, 0x01]) self.assertEqual(self.visitor.get_str(), 'DW_OP_fbreg: 130') self.setUp() self.visitor.process_expr([0x51]) self.assertEqual(self.visitor.get_str(), 'DW_OP_reg1 (rdx)') self.setUp() self.visitor.process_expr([0x90, 16]) self.assertEqual(self.visitor.get_str(), 'DW_OP_regx: 16 (rip)') self.setUp() self.visitor.process_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]) self.assertEqual(self.visitor.get_str(), 'DW_OP_bit_piece: 1295 144') def test_basic_sequence(self): self.visitor.process_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06]) self.assertEqual(self.visitor.get_str(), 'DW_OP_addr: 201; DW_OP_deref; DW_OP_deref') self.setUp() self.visitor.process_expr([0x15, 0xFF, 0x0b, 0xf1, 0xff]) self.assertEqual(self.visitor.get_str(), 'DW_OP_pick: 255; DW_OP_const2s: -15') self.setUp() self.visitor.process_expr([0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1e]) self.assertEqual(self.visitor.get_str(), 'DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul')
def process_compile_unit(dwarf_info, pyelftools_elf_file, compile_unit): # We need this to parse the DW_TAG_variable DW_AT_location # This has to be done for each compile unit (I think I got errors otherwise) structs = DWARFStructs(little_endian=pyelftools_elf_file.little_endian, dwarf_format=compile_unit.dwarf_format(), address_size=compile_unit['address_size']) get_compile_unit_types(compile_unit) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in compile_unit.iter_DIEs(): process_die(DIE, structs, compile_unit)
class TestExprDumper(unittest.TestCase): structs32 = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) def setUp(self): self.visitor = ExprDumper(self.structs32) set_global_machine_arch('x64') def test_basic_single(self): self.assertEqual(self.visitor.dump_expr([0x1b]), 'DW_OP_div') self.assertEqual(self.visitor.dump_expr([0x74, 0x82, 0x01]), 'DW_OP_breg4 (rsi): 130') self.assertEqual(self.visitor.dump_expr([0x91, 0x82, 0x01]), 'DW_OP_fbreg: 130') self.assertEqual(self.visitor.dump_expr([0x51]), 'DW_OP_reg1 (rdx)') self.assertEqual(self.visitor.dump_expr([0x90, 16]), 'DW_OP_regx: 16 (rip)') self.assertEqual( self.visitor.dump_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]), 'DW_OP_bit_piece: 1295 144') self.assertEqual( self.visitor.dump_expr( [0x0e, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00]), 'DW_OP_const8u: 71777214294589695') def test_basic_sequence(self): self.assertEqual( self.visitor.dump_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06]), 'DW_OP_addr: 201; DW_OP_deref; DW_OP_deref') self.assertEqual( self.visitor.dump_expr([0x15, 0xFF, 0x0b, 0xf1, 0xff]), 'DW_OP_pick: 255; DW_OP_const2s: -15') self.assertEqual( self.visitor.dump_expr([0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1e]), 'DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul') # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so # check for both to prevent non-determinism. self.assertIn(self.visitor.dump_expr([0x08, 0x0f, 0xe0]), ('DW_OP_const1u: 15; DW_OP_GNU_push_tls_address', 'DW_OP_const1u: 15; DW_OP_lo_user'))
def test_ehframe_fde_with_lsda_pointer(self): # CIE and FDE dumped from exceptions_0, offset 0xcc0 # binary is at https://github.com/angr/binaries/blob/master/tests/x86_64/exceptions_0 data = ( b'' + # CIE b'\x1c\x00\x00\x00' + # length b'\x00\x00\x00\x00' + # ID b'\x01' + # version b'\x7a\x50\x4c\x52\x00' + # augmentation string b'\x01' + # code alignment b'\x78' + # data alignment b'\x10' + # return address register b'\x07' + # augmentation data length b'\x9b' + # personality function pointer encoding b'\x3d\x13\x20\x00' + # personality function pointer b'\x1b' + # LSDA pointer encoding b'\x1b' + # FDE encoding b'\x0c\x07\x08\x90' + # initial instructions b'\x01\x00\x00' + # FDE b'\x24\x00\x00\x00' + # length b'\x24\x00\x00\x00' + # CIE reference pointer b'\x62\xfd\xff\xff' + # pc begin b'\x89\x00\x00\x00' + # pc range b'\x04' + # augmentation data length b'\xb7\x00\x00\x00' + # LSDA pointer b'\x41\x0e\x10\x86' + # initial instructions b'\x02\x43\x0d\x06' + b'\x45\x83\x03\x02' + b'\x7f\x0c\x07\x08' + b'\x00\x00\x00') s = BytesIO(data) structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=8) cfi = CallFrameInfo(s, len(data), 0, structs, for_eh_frame=True) entries = cfi.get_entries() self.assertEqual(len(entries), 2) self.assertIsInstance(entries[0], CIE) self.assertIn('LSDA_encoding', entries[0].augmentation_dict) # check LSDA encoding lsda_encoding = entries[0].augmentation_dict['LSDA_encoding'] basic_encoding = lsda_encoding & 0x0f modifier = lsda_encoding & 0xf0 self.assertEqual(basic_encoding, DW_EH_encoding_flags['DW_EH_PE_sdata4']) self.assertEqual(modifier, DW_EH_encoding_flags['DW_EH_PE_pcrel']) self.assertIsInstance(entries[1], FDE) self.assertEqual(entries[1].lsda_pointer, 232)
def test_describe_CFI_instructions(self): # The data here represents a single CIE data = ( b'' + b'\x16\x00\x00\x00' + # length b'\xff\xff\xff\xff' + # CIE_id b'\x03\x00\x04\x7c' + # version, augmentation, caf, daf b'\x08' + # return address b'\x0c\x07\x02' + b'\x10\x02\x07\x03\x01\x02\x00\x00\x06\x06') s = BytesIO(data) structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) cfi = CallFrameInfo(s, len(data), structs) entries = cfi.get_entries() set_global_machine_arch('x86') self.assertEqual(describe_CFI_instructions(entries[0]), ( ' DW_CFA_def_cfa: r7 (edi) ofs 2\n' + ' DW_CFA_expression: r2 (edx) (DW_OP_addr: 201; DW_OP_deref; DW_OP_deref)\n' ))
def _make_program_in_stream(self, stream): """ Create a LineProgram from the given program encoded in a stream """ ds = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) header = ds.Dwarf_lineprog_header.parse( b'\x04\x10\x00\x00' + # initial lenght b'\x03\x00' + # version b'\x20\x00\x00\x00' + # header length b'\x01\x01\x01\x0F' + # flags b'\x0A' + # opcode_base b'\x00\x01\x04\x08\x0C\x01\x01\x01\x00' + # standard_opcode_lengths # 2 dir names followed by a NULL b'\x61\x62\x00\x70\x00\x00' + # a file entry b'\x61\x72\x00\x0C\x0D\x0F' + # and another entry b'\x45\x50\x51\x00\x86\x12\x07\x08' + # followed by NULL b'\x00') lp = LineProgram(header, stream, ds, 0, len(stream.getvalue())) return lp
class TestParseExpr(unittest.TestCase): structs32 = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) def setUp(self): set_global_machine_arch('x64') def test_single(self): p = DWARFExprParser(self.structs32) lst = p.parse_expr([0x1b]) self.assertEqual(lst, [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[])]) lst = p.parse_expr([0x90, 16]) self.assertEqual( lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16])]) lst = p.parse_expr([0xe0]) self.assertEqual(lst, [ DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address', args=[]) ])
def __init__(self, elffile): section = elffile.get_section_by_name(".debug") section_data = section.data() self.section_size = section_size = len(section_data) self.stm = stm = BytesIO() self.stm.write(section_data) self.stm.seek(0, 0) lsection = elffile.get_section_by_name(".line") if lsection: self.linestream = BytesIO() self.linestream.write(lsection.data()) self.linestream.seek(0, 0) self.config = DwarfConfig(little_endian=elffile.little_endian, default_address_size=elffile.elfclass // 8, machine_arch=elffile.get_machine_arch()) self.structs = DWARFStructs( little_endian=self.config.little_endian, dwarf_format=32, address_size=self.config.default_address_size)
def test_spec_sample_d6(self): # D.6 sample in DWARFv3 s = BytesIO() data = ( b'' + # first comes the CIE b'\x20\x00\x00\x00' + # length b'\xff\xff\xff\xff' + # CIE_id b'\x03\x00\x04\x7c' + # version, augmentation, caf, daf b'\x08' + # return address b'\x0c\x07\x00' + b'\x08\x00' + b'\x07\x01' + b'\x07\x02' + b'\x07\x03' + b'\x08\x04' + b'\x08\x05' + b'\x08\x06' + b'\x08\x07' + b'\x09\x08\x01' + b'\x00' + # then comes the FDE b'\x28\x00\x00\x00' + # length b'\x00\x00\x00\x00' + # CIE_pointer (to CIE at 0) b'\x44\x33\x22\x11' + # initial_location b'\x54\x00\x00\x00' + # address range b'\x41' + b'\x0e\x0c' + b'\x41' + b'\x88\x01' + b'\x41' + b'\x86\x02' + b'\x41' + b'\x0d\x06' + b'\x41' + b'\x84\x03' + b'\x4b' + b'\xc4' + b'\x41' + b'\xc6' + b'\x0d\x07' + b'\x41' + b'\xc8' + b'\x41' + b'\x0e\x00' + b'\x00\x00') s.write(data) structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) cfi = CallFrameInfo(s, len(data), structs) entries = cfi.get_entries() self.assertEqual(len(entries), 2) self.assertIsInstance(entries[0], CIE) self.assertEqual(entries[0]['length'], 32) self.assertEqual(entries[0]['data_alignment_factor'], -4) self.assertEqual(entries[0]['return_address_register'], 8) self.assertEqual(len(entries[0].instructions), 11) self.assertInstruction(entries[0].instructions[0], 'DW_CFA_def_cfa', [7, 0]) self.assertInstruction(entries[0].instructions[8], 'DW_CFA_same_value', [7]) self.assertInstruction(entries[0].instructions[9], 'DW_CFA_register', [8, 1]) self.assertTrue(isinstance(entries[1], FDE)) self.assertEqual(entries[1]['length'], 40) self.assertEqual(entries[1]['CIE_pointer'], 0) self.assertEqual(entries[1]['address_range'], 84) self.assertIs(entries[1].cie, entries[0]) self.assertEqual(len(entries[1].instructions), 21) self.assertInstruction(entries[1].instructions[0], 'DW_CFA_advance_loc', [1]) self.assertInstruction(entries[1].instructions[1], 'DW_CFA_def_cfa_offset', [12]) self.assertInstruction(entries[1].instructions[9], 'DW_CFA_offset', [4, 3]) self.assertInstruction(entries[1].instructions[18], 'DW_CFA_def_cfa_offset', [0]) self.assertInstruction(entries[1].instructions[20], 'DW_CFA_nop', []) # Now let's decode it... decoded_CIE = entries[0].get_decoded() self.assertEqual(decoded_CIE.reg_order, list(range(9))) self.assertEqual(len(decoded_CIE.table), 1) self.assertEqual(decoded_CIE.table[0]['cfa'].reg, 7) self.assertEqual(decoded_CIE.table[0]['pc'], 0) self.assertEqual(decoded_CIE.table[0]['cfa'].offset, 0) self.assertEqual(decoded_CIE.table[0][4].type, RegisterRule.SAME_VALUE) self.assertEqual(decoded_CIE.table[0][8].type, RegisterRule.REGISTER) self.assertEqual(decoded_CIE.table[0][8].arg, 1) decoded_FDE = entries[1].get_decoded() self.assertEqual(decoded_FDE.reg_order, list(range(9))) self.assertEqual(decoded_FDE.table[0]['cfa'].reg, 7) self.assertEqual(decoded_FDE.table[0]['cfa'].offset, 0) self.assertEqual(decoded_FDE.table[0]['pc'], 0x11223344) self.assertEqual(decoded_FDE.table[0][8].type, RegisterRule.REGISTER) self.assertEqual(decoded_FDE.table[0][8].arg, 1) self.assertEqual(decoded_FDE.table[1]['cfa'].reg, 7) self.assertEqual(decoded_FDE.table[1]['cfa'].offset, 12) self.assertEqual(decoded_FDE.table[2][8].type, RegisterRule.OFFSET) self.assertEqual(decoded_FDE.table[2][8].arg, -4) self.assertEqual(decoded_FDE.table[2][4].type, RegisterRule.SAME_VALUE) self.assertEqual(decoded_FDE.table[5]['pc'], 0x11223344 + 20) self.assertEqual(decoded_FDE.table[5][4].type, RegisterRule.OFFSET) self.assertEqual(decoded_FDE.table[5][4].arg, -12) self.assertEqual(decoded_FDE.table[6]['pc'], 0x11223344 + 64) self.assertEqual(decoded_FDE.table[9]['pc'], 0x11223344 + 76)
class LSDAExceptionTable: """ LSDA exception table parser. TODO: Much of this class should be eventually moved to pyelftools. """ def __init__(self, stream, bits, little_endian=True): self.address = None self.base_offset = None self.stream = stream if bits in (32, 64): dwarf_format = bits else: raise ValueError( "Unsupported bits value %d. Expect either 32 or 64." % bits) self.entry_structs = DWARFStructs(little_endian=little_endian, dwarf_format=dwarf_format, address_size=bits // 8) self._formats = self._eh_encoding_to_field(self.entry_structs) @staticmethod def _eh_encoding_to_field(entry_structs): """ Shamelessly copied from pyelftools since the original method is a bounded method. Return a mapping from basic encodings (DW_EH_encoding_flags) the corresponding field constructors (for instance entry_structs.Dwarf_uint32). """ return { DW_EH_encoding_flags['DW_EH_PE_absptr']: entry_structs.Dwarf_target_addr, DW_EH_encoding_flags['DW_EH_PE_uleb128']: entry_structs.Dwarf_uleb128, DW_EH_encoding_flags['DW_EH_PE_udata2']: entry_structs.Dwarf_uint16, DW_EH_encoding_flags['DW_EH_PE_udata4']: entry_structs.Dwarf_uint32, DW_EH_encoding_flags['DW_EH_PE_udata8']: entry_structs.Dwarf_uint64, DW_EH_encoding_flags['DW_EH_PE_sleb128']: entry_structs.Dwarf_sleb128, DW_EH_encoding_flags['DW_EH_PE_sdata2']: entry_structs.Dwarf_int16, DW_EH_encoding_flags['DW_EH_PE_sdata4']: entry_structs.Dwarf_int32, DW_EH_encoding_flags['DW_EH_PE_sdata8']: entry_structs.Dwarf_int64, } def parse_lsda(self, address, offset): self.address = address self.base_offset = offset self.stream.seek(offset) header = self._parse_lsda_header() csrs = [] # type: List[CallSiteEntry] start_offset = self.stream.tell() while self.stream.tell() - start_offset < header.call_site_table_len: csr = self._parse_call_site_entry(header.call_site_encoding) if csr is not None: csrs.append(csr) return csrs def _parse_lsda_header(self): # lpstart lpstart_encoding = self.stream.read(1)[0] if lpstart_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: base_encoding = lpstart_encoding & 0x0f modifier = lpstart_encoding & 0xf0 lpstart = struct_parse( Struct('dummy', self._formats[base_encoding]('LPStart')), self.stream)['LPStart'] if modifier == 0: pass elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']: lpstart += self.address + (self.stream.tell() - self.base_offset) else: raise NotImplementedError("Unsupported modifier %#x." % modifier) else: lpstart = None # ttype ttype_encoding = self.stream.read(1)[0] if ttype_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: ttype_offset = struct_parse( Struct('dummy', self.entry_structs.Dwarf_uleb128('TType')), self.stream)['TType'] else: ttype_offset = None # call site table length cstable_encoding = self.stream.read(1)[0] cstable_length = struct_parse( Struct('dummy', self.entry_structs.Dwarf_uleb128('CSTable')), self.stream)['CSTable'] return ExceptionTableHeader( lpstart, ttype_encoding, ttype_offset, cstable_encoding, cstable_length, ) def _parse_call_site_entry(self, encoding): base_encoding = encoding & 0x0f modifier = encoding & 0xf0 # header s = struct_parse( Struct( 'CallSiteEntry', self._formats[base_encoding]('cs_start'), self._formats[base_encoding]('cs_len'), self._formats[base_encoding]('cs_lp'), self.entry_structs.Dwarf_uleb128('cs_action'), ), self.stream) cs_start = s['cs_start'] cs_len = s['cs_len'] cs_lp = s['cs_lp'] cs_action = s['cs_action'] if modifier == 0: pass else: raise NotImplementedError( "Unsupported modifier for CallSiteEntry: %#x." % modifier) return CallSiteEntry(cs_start, cs_len, cs_lp, cs_action)
class TestParseExpr(unittest.TestCase): structs32 = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) def setUp(self): set_global_machine_arch('x64') def test_single(self): p = DWARFExprParser(self.structs32) lst = p.parse_expr([0x1b]) self.assertEqual( lst, [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[], offset=0)]) lst = p.parse_expr([0x90, 16]) self.assertEqual( lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16], offset=0)]) lst = p.parse_expr([0xe0]) self.assertEqual(len(lst), 1) # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so # check for both to prevent non-determinism. self.assertIn(lst[0], [ DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address', args=[], offset=0), DWARFExprOp(op=0xe0, op_name='DW_OP_lo_user', args=[], offset=0) ]) # Real life example: # push_object_address # deref # dup # bra 4 # lit0 # skip 3 # lit4 # minus # deref lst = p.parse_expr([ 0x97, 0x6, 0x12, 0x28, 0x4, 0x0, 0x30, 0x2F, 0x3, 0x0, 0x34, 0x1C, 0x6 ]) self.assertEqual(len(lst), 9) self.assertEqual(lst, [ DWARFExprOp(op=0x97, op_name='DW_OP_push_object_address', args=[], offset=0), DWARFExprOp(op=0x6, op_name='DW_OP_deref', args=[], offset=1), DWARFExprOp(op=0x12, op_name='DW_OP_dup', args=[], offset=2), DWARFExprOp(op=0x28, op_name='DW_OP_bra', args=[4], offset=3), DWARFExprOp(op=0x30, op_name='DW_OP_lit0', args=[], offset=6), DWARFExprOp(op=0x2f, op_name='DW_OP_skip', args=[3], offset=7), DWARFExprOp(op=0x34, op_name='DW_OP_lit4', args=[], offset=10), DWARFExprOp(op=0x1c, op_name='DW_OP_minus', args=[], offset=11), DWARFExprOp(op=0x6, op_name='DW_OP_deref', args=[], offset=12) ])
class LSDATable(): """ The LSDA Table in GCC-Frontend Compilers (All GCC Languages) implements the LSDA using __gxx_personality_v0. Thus this should work for all GCC-languages we care about, but that isn't guaranteed. """ def __init__(self, elffile, fileoffset, fstart, container): self.elf = elffile self.lsda_offset = fileoffset self.entry_structs = DWARFStructs(True, 64, 8) self.entries = [] self.fstart = fstart self.sz = container.functions[fstart].sz self._formats = self._eh_encoding_to_field(self.entry_structs) self.actions = [] self.ttentries = OrderedDict() self.typetable_offset_present = False self._parse_lsda() @staticmethod def _eh_encoding_to_field(entry_structs): """ Shamelessly copied from pyelftools since the original method is a bounded method. Return a mapping from basic encodings (DW_EH_encoding_flags) the corresponding field constructors (for instance entry_structs.Dwarf_uint32). """ return { DW_EH_encoding_flags['DW_EH_PE_absptr']: entry_structs.Dwarf_target_addr, DW_EH_encoding_flags['DW_EH_PE_uleb128']: entry_structs.Dwarf_uleb128, DW_EH_encoding_flags['DW_EH_PE_udata2']: entry_structs.Dwarf_uint16, DW_EH_encoding_flags['DW_EH_PE_udata4']: entry_structs.Dwarf_uint32, DW_EH_encoding_flags['DW_EH_PE_udata8']: entry_structs.Dwarf_uint64, DW_EH_encoding_flags['DW_EH_PE_sleb128']: entry_structs.Dwarf_sleb128, DW_EH_encoding_flags['DW_EH_PE_sdata2']: entry_structs.Dwarf_int16, DW_EH_encoding_flags['DW_EH_PE_sdata4']: entry_structs.Dwarf_int32, DW_EH_encoding_flags['DW_EH_PE_sdata8']: entry_structs.Dwarf_int64, } def _parse_lsda(self): self._parse_lsda_header() self._parse_lsda_entries() def _parse_lsda_header(self): # https://www.airs.com/blog/archives/464 self.elf.seek(self.lsda_offset) lpstart_raw = self.elf.read(1)[0] lpstart = None if lpstart_raw != DW_EH_encoding_flags['DW_EH_PE_omit']: # See https://www.airs.com/blog/archives/460, it should be omit in # practice raise Exception("We do not handle this case for now") base_encoding = lpstart_raw & 0x0F modifier = lpstart_raw & 0xF0 lpstart = struct_parse( Struct('dummy', self._formats[base_encoding]('LPStart')), self.elf)['LPStart'] if modifier == 0: pass elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']: lpstart += self.address + (self.elf.tell() - self.base_offset) else: raise Exception("what") typetable_encoding = self.elf.read(1)[0] typetable_offset = None # NOW TODO : the encoding is the right one + 1, which is weird self.typetable_offset = 0 if typetable_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: self.typetable_offset = struct_parse( Struct('dummy', self.entry_structs.Dwarf_uleb128('TType')), self.elf)['TType'] self.typetable_offset_present = True else: self.typetable_offset_present = False call_site_table_encoding = self.elf.read(1)[0] call_site_table_len = struct_parse( Struct('dummy', self.entry_structs.Dwarf_uleb128('CSTable')), self.elf)['CSTable'] if self.typetable_offset_present == False: self.typetable_offset = call_site_table_len print("lpstart", lpstart_raw) print("lpstart_pcrel", lpstart) print("typetable_encoding", typetable_encoding) print("call_site_table_encoding", call_site_table_encoding) print("CALL SITE TABLE LENGTH %d" % call_site_table_len) print("TYPETABLE OFFSET %d" % self.typetable_offset) self.end_label = ".LLSDATT%x" % self.fstart self.table_label = ".LLSDATTD%x" % self.fstart self.action_label = ".LLSDACSE%x" % self.fstart self.callsite_label = ".LLSDACSB%x" % self.fstart self.ttable_prefix_label = ".LLSDATYP%x" % self.fstart # Need to construct some representation here. self.header = { "lpstart": lpstart_raw, "encoding": call_site_table_encoding, "typetable_encoding": typetable_encoding, "call_site_table_len": call_site_table_len, "cs_act_tt_total_len": self. typetable_offset, # Don't forget to check typetable_offset_present } def _parse_lsda_entries(self): start_cs_offset = self.elf.tell() action_count = 0 while self.elf.tell( ) - start_cs_offset < self.header["call_site_table_len"]: base_encoding = self.header["encoding"] & 0x0f modifier = self.header["encoding"] & 0xf0 # Maybe we need to store the offset in the entry ? # header s = struct_parse( Struct( 'CallSiteEntry', self._formats[base_encoding]('cs_start'), self._formats[base_encoding]('cs_len'), self._formats[base_encoding]('cs_lp'), self.entry_structs.Dwarf_uleb128('cs_action'), ), self.elf) cs_action = s['cs_action'] if cs_action != 0: action_offset_bytes = (cs_action + 1) >> 1 action_count = max(action_count, action_offset_bytes) self.entries.append(s) processed_bytes = self.elf.tell() - start_cs_offset print("+++++ %d bytes read, %d to go, %d actions" % (processed_bytes, self.typetable_offset - processed_bytes, action_count)) idx = action_count processed_action_count = 0 while idx > 0: action = struct_parse( Struct("ActionEntry", self.entry_structs.Dwarf_uint8('act_filter'), self.entry_structs.Dwarf_uint8('act_next')), self.elf) print(">>>> ACTION ", action) self.actions.append(action) processed_action_count = processed_action_count + 1 idx -= 1 ttendloc = self.lsda_offset + self.typetable_offset + 3 print("TT %x" % (ttendloc)) for action in self.actions: type_bytes_offset = (action.act_filter * -8) ttentryloc = ttendloc + type_bytes_offset print("****** TT LOC: %x" % (ttentryloc)) self.elf.seek(ttentryloc, io.SEEK_SET) ptrbytes = self.elf.read(8) ptr = struct.unpack("<Q", ptrbytes)[0] print("****** TT PTR: %x" % ptr) symbolized_target = 0 if ptr != 0: symbolized_target = ptr + ttentryloc typeentry = {'address': symbolized_target} self.ttentries[action.act_filter] = typeentry print("****** TT x: %x" % (symbolized_target)) def generate_tableoffset(self): ttoffset = "" if self.typetable_offset_present: ttoffset = ".uleb128 %s-%s" % (self.end_label, self.table_label) else: ttoffset = "# @TType Encoding is DW_EH_PE_omit, ignoring." return ttoffset def generate_header(self): print("generate header", self.fstart) print("generate table label", self.table_label) ttoffset = self.generate_tableoffset() table_header = """ .LFE%x: .section .gcc_except_table,"a",@progbits .align 4 GCC_except_table%x: .LLSDA%x: .byte 0x%x # @LPStart encoding .byte 0x%x # @TType Encoding %s .LLSDATTD%x: .byte 0x1 """ % ( self.fstart, self.fstart, self.fstart, self.header["lpstart"], self.header["typetable_encoding"], ttoffset, self.fstart, ) return table_header def generate_table(self): print("generate table", self.fstart) table = """ .uleb128 %s-%s .LLSDACSB%x: %s .LLSDACSE%x: %s .align 4 %s .LLSDATT%x: .p2align 2 """ % (self.action_label, self.callsite_label, self.fstart, self.generate_callsites(), self.fstart, self.generate_actions(), self.generate_typetable(), self.fstart) return table def generate_callsites(self): #.LLSDACSB2: # .uleb128 .LEHB4-.LFB2 ; uint8_t start # .uleb128 .LEHE4-.LEHB4 ; uint8_t len # .uleb128 .L19-.LFB2 ; uint8_t lp # .uleb128 0x3 ; uint8_t action function_end = self.sz + self.fstart def callsite_ftr(entry): cbw_e = "" if self.fstart + entry["cs_start"] + entry[ "cs_len"] >= function_end: cbw_e = "E" jlo_e = "" if self.fstart + entry["cs_lp"] >= function_end: jlo_e = "E" """ cse: The start of the instructions for the current call site, a byte offset from the landing pad base. This is encoded using the encoding from the header. cbw: The length of the instructions for the current call site, in bytes. This is encoded using the encoding from the header. jlo: A pointer to the landing pad for this sequence of instructions, or 0 if there isn’t one. This is a byte offset from the landing pad base. This is encoded using the encoding from the header. act: The action to take, an unsigned LEB128. This is 1 plus a byte offset into the action table. The value zero means that there is no action. """ cse = "\t.uleb128 .LC%x-.L%x \t# Call Site Entry (%u)" % ( self.fstart + entry["cs_start"], self.fstart, entry["cs_start"]) cbw = "\t.uleb128 .LC%s%x-.LC%x \t# Call Between (%u)" % ( cbw_e, self.fstart + entry["cs_start"] + entry["cs_len"], self.fstart + entry["cs_start"], entry["cs_len"]) jlo = "\t.uleb128 .LC%s%x-.L%x \t# Jump Location (%u)" % ( jlo_e, self.fstart + entry["cs_lp"], self.fstart, entry["cs_lp"]) act = "\t.uleb128 0x%x \t# Action\n" % ( entry["cs_action"]) return "\n".join([cse, cbw, jlo, act]) return "\n".join(map(callsite_ftr, self.entries)) def generate_typetable(self): ttable = "" i = 1 for idx, tp in reversed(list(self.ttentries.items())): print("*******", idx) label = "%sE%s" % (self.ttable_prefix_label, i) i += 1 target_label = "" if tp["address"] != 0: target_label = ".LC%x-." % (tp["address"]) #target_label = ".LC%x" % (tp["address"]) else: target_label = "0" ttable += """ %s: .quad %s """ % (label, target_label) return ttable def generate_actions(self): action_table = "\n" for action in self.actions: action_table += "# Action Filter and Next Record\n" action_table += " .byte %u\n" % (action["act_filter"]) action_table += " .byte %u\n" % (action["act_next"]) action_table += "\n" return action_table def generate_footer(self): return "%s:\n" % self.end_label