Esempio n. 1
0
    def assertImageContainsSection(self, image, elf, section_name):
        """
        Assert an esptool binary image object contains
        the data for a particular ELF section.
        """
        with open(elf, "rb") as f:
            e = ELFFile(f)
            section = e.get_section_by_name(section_name)
            self.assertTrue(section, "%s should be in the ELF" % section_name)
            sh_addr = section.header.sh_addr
            data = section.data()
            # section contents may be smeared across multiple image segments,
            # so look through each segment and remove it from ELF section 'data'
            # as we find it in the image segments. When we're done 'data' should
            # all be accounted for
            for seg in sorted(image.segments, key=lambda s:s.addr):
                print("comparing seg 0x%x sec 0x%x len 0x%x" % (seg.addr, sh_addr, len(data)))
                if seg.addr == sh_addr:
                    overlap_len = min(len(seg.data), len(data))
                    self.assertEqual(data[:overlap_len], seg.data[:overlap_len],
                                     "ELF '%s' section has mis-matching binary image data" % section_name)
                    sh_addr += overlap_len
                    data = data[overlap_len:]

            # no bytes in 'data' should be left unmatched
            self.assertEqual(0, len(data),
                             "ELF %s section '%s' has no encompassing segment(s) in binary image (image segments: %s)"
                             % (elf, section_name, image.segments))
Esempio n. 2
0
	def analyze(self, pkginfo, tar):
		exec_stacks = []

		for entry in tar:
			tmpname = _test_elf_and_extract(tar, entry)
			if not tmpname:
				continue

			try:
				fp = open(tmpname, 'rb')
				elffile = ELFFile(fp)

				for segment in elffile.iter_segments():
					if segment['p_type'] != 'PT_GNU_STACK': continue

					mode = segment['p_flags']
					if mode & 1: exec_stacks.append(entry.name)

				fp.close()
			finally:
				os.unlink(tmpname)

		if exec_stacks:
			self.warnings = [("elffile-with-execstack %s", i)
					for i in exec_stacks]
Esempio n. 3
0
    def _test_elf2image(self, elfpath, binpath):
        try:
            self.run_elf2image("esp8266", elfpath, 2)
            image = esptool.LoadFirmwareImage("esp8266", binpath)
            self.assertEqual(4, len(image.segments))
            self.assertImageContainsSection(image, elfpath, ".data")
            self.assertImageContainsSection(image, elfpath, ".text")
            self.assertImageContainsSection(image, elfpath, ".rodata")
            irom_segment = image.segments[0]
            self.assertEqual(0, irom_segment.addr,
                             "IROM segment 'load address' should be zero")
            with open(elfpath, "rb") as f:
                e = ELFFile(f)
                sh_size = (e.get_section_by_name(".irom0.text").header.sh_size + 15) & ~15
                self.assertEqual(len(irom_segment.data), sh_size, "irom segment (0x%x) should be same size (16 padded) as .irom0.text section (0x%x)" % (len(irom_segment.data), sh_size))

            # check V2 CRC (for ESP8266 SDK bootloader)
            with open(binpath, "rb") as f:
                f.seek(-4, os.SEEK_END)
                image_len = f.tell()
                crc_stored = struct.unpack("<I", f.read(4))[0]
                f.seek(0)
                crc_calc = esptool.esp8266_crc32(f.read(image_len))
                self.assertEqual(crc_stored, crc_calc)

            # test imageinfo doesn't fail
            self.assertImageInfo(binpath)

        finally:
            try_delete(binpath)
Esempio n. 4
0
def resolve_addr(fn, ip):
    if fn in open_files:
        elffile = open_files[fn]
    else:
        f = open(fn, 'rb')
        elffile = ELFFile(f)
        open_files[fn] = elffile

    if fn not in lines and elffile.has_dwarf_info():
        lines[fn] = build_line_table(elffile.get_dwarf_info())

    if fn not in symtables:
        symtables[fn] = build_symtab(elffile)

    loc = None
    offset = None
    if fn in symtables:
        sym = find_le(symtables[fn], ip)
        if sym:
            loc, offset = sym[2], ip - sym[0]

    src = None
    if fn in lines:
        pos = find_le(lines[fn], ip)
        if pos:
            src = "%s:%d" % (pos[2], pos[3])    

    return loc, offset, src
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--ram-section", action="append", required=True)
    parser.add_argument("--rom-section", action="append", required=True)
    parser.add_argument("elffile")
    args = parser.parse_args()

    with open(args.elffile, 'rb') as f:
        elffile = ELFFile(f)
        ram_usage = 0
        rom_usage = 0

        # RAM usage
        for name in args.ram_section:
            section = elffile.get_section_by_name(name)
            if section is None:
                print("Bad section name " + name)
                sys.exit(1)
            ram_usage += section["sh_size"]

        # ROM usage
        for name in args.rom_section:
            section = elffile.get_section_by_name(name)
            if section is None:
                print("Bad section name " + name)
                sys.exit(1)
            rom_usage += section["sh_size"]

    print('{{\"program\": {}, \"data\": {}}}'.format(rom_usage, ram_usage))
Esempio n. 6
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset {0!s}, length {1!s}'.format(
                CU.cu_offset, CU['unit_length']))

            # The first DIE in each compile unit describes it.
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag={0!s}'.format(top_DIE.tag))

            # We're interested in the filename...
            print('    name={0!s}'.format(top_DIE.get_full_path()))
Esempio n. 7
0
File: test.py Progetto: mhx/artx
 def __init__(self, elf):
     self.__glb = defaultdict(dict)
     self.__loc = defaultdict(lambda : defaultdict(dict))
     self.__map = []
     cur_file = ''
     with open(elf, 'rb') as f:
         ef = ELFFile(f)
         for section in ef.iter_sections():
             if isinstance(section, SymbolTableSection):
                 for symbol in section.iter_symbols():
                     if symbol['st_other']['visibility'] == 'STV_DEFAULT':
                         stype = symbol['st_info']['type'][4:].lower()
                         sbind = symbol['st_info']['bind']
                         if stype == 'file':
                             cur_file = symbol.name
                         elif stype != 'notype':
                             scope = ''
                             if sbind == 'STB_LOCAL':
                                 scope = cur_file + ':'
                                 self.__loc[cur_file][stype][symbol.name] = symbol['st_value']
                             elif sbind == 'STB_GLOBAL':
                                 self.__glb[stype][symbol.name] = symbol['st_value']
                             self.__map.append([
                                 symbol['st_value'], symbol['st_size'],
                                 '{0}{1}'.format(scope, symbol.name)])
     self.__map.sort(key=itemgetter(0))
     self.__idx = [i[0] for i in self.__map]
Esempio n. 8
0
    def run(self):
        elf = ELFFile(self.f)

        if not elf.has_dwarf_info():
            GLib.idle_add(self.window.display_error, "This file has no DWARF info.")
            return

        di = elf.get_dwarf_info()

        builder = DwarfModelBuilder(di, self.verbose)
        total = builder.num_cus()
        n = 0

        generator = builder.build_step()
        file_elem = next(generator)
        while not file_elem:
            if self.stop_requested:
                return

            GLib.idle_add(self.window.load_progress, float(n) / total)
            n = n + 1
            file_elem = next(generator)


        #root_elem = builder.build()

        if self.stop_requested:
            return

        GLib.idle_add(self.window.done_loading, file_elem)
Esempio n. 9
0
    def __init__(self,filename):
        self.ins = {}
        self.sym = {}
        elf = ELFFile(open(filename,'rb'))
        data = elf.get_section_by_name(b'.rodata')
        s = elf.get_section_by_name(b'.text')
        
        if not s:
            print ("No code found")
            return
        
        self.code = s.data()
        self.offset = s['sh_addr']
        self.md = Cs(CS_ARCH_X86, CS_MODE_64)
        self.md.detail = True
        self.rodata = {k + data.header.sh_addr - s.header.sh_addr : data.data()[k] for k in range(len(data.data())) }
        for i in self.md.disasm(self.code,0): # Some bug if we dont disassemble from 0
#            print hex(i.address),i.mnemonic
#            print json.dumps(i)
            self.ins[i.address] = i
        addr = 0
        s = elf.get_section_by_name(b'.symtab')
        if s:
            if isinstance(s, SymbolTableSection):
                for sym in s.iter_symbols():
                    self.sym[sym.name] = sym['st_value']  - self.offset
Esempio n. 10
0
 def _get_impalad_dwarf_info(self):
   """
   Read the impalad_path ELF binary, which is supposed to contain DWARF, and read the
   DWARF to understand the compiler options. Return a 2-tuple of the two useful DIE
   attributes of the first compile unit: the DW_AT_name and DW_AT_producer. If
   something goes wrong doing this, log a warning and return nothing.
   """
   # Some useful references:
   # - be/CMakeLists.txt
   # - gcc(1), especially -grecord-gcc-switches, -g, -ggdb, -gdwarf-2
   # - readelf(1)
   # - general reading about DWARF
   # A useful command for exploration without having to wade through many bytes is:
   # readelf --debug-dump=info --dwarf-depth=1 impalad
   # The DWARF lines are long, raw, and nasty; I'm hesitant to paste them here, so
   # curious readers are highly encouraged to try the above, or read IMPALA-3501.
   die_name = None
   die_producer = None
   try:
     with open(self.impalad_path, 'rb') as fh:
       impalad_elf = ELFFile(fh)
       if impalad_elf.has_dwarf_info():
         dwarf_info = impalad_elf.get_dwarf_info()
         # We only need the first CU, hence the unconventional use of the iterator
         # protocol.
         cu_iterator = dwarf_info.iter_CUs()
         first_cu = next(cu_iterator)
         top_die = first_cu.get_top_DIE()
         die_name = top_die.attributes['DW_AT_name'].value
         die_producer = top_die.attributes['DW_AT_producer'].value
   except Exception as e:
     LOG.warn('Failure to read DWARF info from {0}: {1}'.format(self.impalad_path,
                                                                str(e)))
   return die_name, die_producer
Esempio n. 11
0
def load_binary(static):
  elf = ELFFile(open(static.path))

  # TODO: replace with elf['e_machine']
  progdat = open(static.path).read(0x20)
  fb = struct.unpack("H", progdat[0x12:0x14])[0]   # e_machine
  static['arch'] = get_arch(fb)
  static['entry'] = elf['e_entry']

  ncount = 0
  for section in elf.iter_sections():
    addr = section['sh_addr']
    slen = section['sh_size']
    if addr != 0 and slen > 0:
      static.add_memory_chunk(addr, section.data())

    if isinstance(section, RelocationSection):
      symtable = elf.get_section(section['sh_link'])
      for rel in section.iter_relocations():
        symbol = symtable.get_symbol(rel['r_info_sym'])
        if static.debug >= 1: #suppress output for testing
          print "Relocation",rel, symbol.name
        if rel['r_offset'] != 0 and symbol.name != "":
          static[rel['r_offset']]['name'] = "__"+symbol.name
          ncount += 1

    if isinstance(section, SymbolTableSection):
      for nsym, symbol in enumerate(section.iter_symbols()):
        if symbol['st_value'] != 0 and symbol.name != "" and symbol['st_info']['type'] == "STT_FUNC":
          if static.debug >= 1:
            print "Symbol",symbol['st_value'], symbol.name
          static[symbol['st_value']]['name'] = symbol.name
          ncount += 1
  if static.debug >= 1:
    print "** found %d names" % ncount
Esempio n. 12
0
 def _dump_elf(cls, buf):
     """
     Dump the symbol table of an ELF file.
     Needs pyelftools (https://github.com/eliben/pyelftools)
     """
     from elftools.elf.elffile import ELFFile
     from elftools.elf import descriptions
     from io import BytesIO
     f = ELFFile(BytesIO(buf))
     print("ELF file:")
     for sec in f.iter_sections():
         if sec['sh_type'] == 'SHT_SYMTAB':
             symbols = sorted(sec.iter_symbols(), key=lambda sym: sym.name)
             print("    symbols:")
             for sym in symbols:
                 if not sym.name:
                     continue
                 print("    - %r: size=%d, value=0x%x, type=%s, bind=%s"
                       % (sym.name.decode(),
                          sym['st_size'],
                          sym['st_value'],
                          descriptions.describe_symbol_type(sym['st_info']['type']),
                          descriptions.describe_symbol_bind(sym['st_info']['bind']),
                          ))
     print()
Esempio n. 13
0
    def test_verneed_section(self):

        reference_data = TestSymbolVersioning.verneed_reference_data

        with open(os.path.join('test', 'testfiles_for_unittests',
                               'lib_versioned64.so.1.elf'), 'rb') as f:
            elf = ELFFile(f)
            verneed_section = None
            for section in elf.iter_sections():
                if isinstance(section, GNUVerNeedSection):
                    verneed_section = section
                    break

            self.assertIsNotNone(verneed_section)

            for (verneed, vernaux_iter), ref_verneed in zip(
                    section.iter_versions(), reference_data):

                self.assertEqual(verneed.name, ref_verneed['name'])
                self.assertEqual(verneed['vn_cnt'], ref_verneed['vn_cnt'])
                self.assertEqual(verneed['vn_version'],
                                 ref_verneed['vn_version'])

                for vernaux, ref_vernaux in zip(
                        vernaux_iter, ref_verneed['vernaux']):

                    self.assertEqual(vernaux.name, ref_vernaux['name'])
                    self.assertEqual(vernaux['vna_flags'],
                                     ref_vernaux['vna_flags'])
                    self.assertEqual(vernaux['vna_other'],
                                     ref_vernaux['vna_other'])
Esempio n. 14
0
class ReadElf(object):
    def __init__(self, file):
        self.file = ELFFile(file)

    def get_symbol_tables(self):
        symbols = {}
        for section in self.file.iter_sections():
            if isinstance(section, SymbolTableSection) and section['sh_entsize']:
                for nsym, symbol in enumerate(section.iter_symbols()):
                    sym_name = bytes2str(symbol.name)
                    if sym_name in ['KEY_XOR', 'C_CC_HOST', 'C_CC_URI']:
                        symbols[sym_name] = \
                            (
                                symbol['st_value'] if self.file.elfclass == 32 else (symbol['st_value'] % 65536),
                                symbol['st_size']
                            )
        return symbols

    def get_soname(self):
        so_name = None
        for section in self.file.iter_sections():
            if isinstance(section, DynamicSection):
                for tag in section.iter_tags():
                    if tag.entry.d_tag == 'DT_SONAME':
                        so_name = bytes2str(tag.soname)
        return so_name
Esempio n. 15
0
def process_file(stream, isNorm):
    global normAddresses
    global normSizes
    global divAddresses
    global divSizes

    elffile = ELFFile(stream)
    section = elffile.get_section_by_name(b'.symtab')

    if not section:
        print('ERROR: No symbol table found. Perhaps this ELF has been stripped?')
        sys.exit(0)

    # bytes2str is used to print the name of the section for consistency of
    # output between Python 2 and 3. The section name is a bytes object.

    if isinstance(section, SymbolTableSection):
        num_symbols = section.num_symbols()
        for i in range(0,num_symbols):
#TODO UNIQUE ADDRESSES...
            if (section.get_symbol(i).entry['st_value'] == 0):
                continue
            if (section.get_symbol(i).name.find(divLabel) == -1):
                continue #Only valid Labels
            #print('%s : %x' % (section.get_symbol(i).name, section.get_symbol(i).entry['st_value']) )
            if (isNorm):
                normAddresses[section.get_symbol(i).name] = section.get_symbol(i).entry['st_value']
            else:
                divAddresses[section.get_symbol(i).name] = section.get_symbol(i).entry['st_value']
Esempio n. 16
0
 def test_core_prpsinfo(self):
     with open(os.path.join('test',
                            'testfiles_for_unittests', 'core_linux64.elf'),
               'rb') as f:
         elf = ELFFile(f)
         for segment in elf.iter_segments():
             if not isinstance(segment, NoteSegment):
                 continue
             notes = list(segment.iter_notes())
             for note in segment.iter_notes():
                 if note['n_type'] != 'NT_PRPSINFO':
                     continue
                 desc = note['n_desc']
                 self.assertEquals(desc['pr_state'], 0)
                 self.assertEquals(desc['pr_sname'], b'R')
                 self.assertEquals(desc['pr_zomb'], 0)
                 self.assertEquals(desc['pr_nice'], 0)
                 self.assertEquals(desc['pr_flag'], 0x400600)
                 self.assertEquals(desc['pr_uid'], 1000)
                 self.assertEquals(desc['pr_gid'], 1000)
                 self.assertEquals(desc['pr_pid'], 23395)
                 self.assertEquals(desc['pr_ppid'], 23187)
                 self.assertEquals(desc['pr_pgrp'], 23395)
                 self.assertEquals(desc['pr_sid'], 23187)
                 self.assertEquals(
                     desc['pr_fname'],
                     b'coredump_self\x00\x00\x00')
                 self.assertEquals(
                     desc['pr_psargs'],
                     b'./coredump_self foo bar 42 ' + b'\x00' * (80 - 27))
Esempio n. 17
0
    def collect_needed(self, sofile):
        try:
            with open(sofile, 'rb') as f:
                try:
                    elffile = ELFFile(f)

                    # we try to avoid superfluous work by not calling
                    # elffile.itersections() directly
                    # Instead we use the lower level API and continue
                    # if the section type is not SHT_DYNAMIC
                    # We can thus avoid to construct Section objects
                    for i in range(elffile.num_sections()):
                        section_header = elffile._get_section_header(i)
                        sectype = section_header['sh_type']
                        if sectype != 'SHT_DYNAMIC':
                            continue
                        name = elffile._get_section_name(section_header)
                        section = DynamicSection(section_header, name,
                                                 elffile.stream,
                                                 elffile)

                        for tag in section.iter_tags('DT_NEEDED'):
                            self.lib2required_by[tag.needed].append(sofile)
                        break # there should only be one dyanmic section

                except ELFError:
                    pass  # not an ELF file
        except PermissionError:
            warn("Could not open {}; please check permissions".format(sofile))
Esempio n. 18
0
def get_relocations(fd):
	""" 
	Return a dict with the relocations contained in a file
	"""
	elffile = ELFFile(fd)
	relocations = {}
	has_relocation_sections = False
	for section in elffile.iter_sections():
	    if not isinstance(section, RelocationSection):
		continue

	    has_relocation_sections = True
	    # The symbol table section pointed to in sh_link
	    symtable = elffile.get_section(section['sh_link'])

	    for rel in section.iter_relocations():
		offset = rel['r_offset'] 

		symbol = symtable.get_symbol(rel['r_info_sym'])
		# Some symbols have zero 'st_name', so instead what's used is
		# the name of the section they point at
		if symbol['st_name'] == 0:
		    symsec = elffile.get_section(symbol['st_shndx'])
		    symbol_name = symsec.name
		else:
		    symbol_name = symbol.name
		    relocations[offset] = bytes2str(symbol_name)

	return relocations
Esempio n. 19
0
def get_elf_info(filepath):
    """
    Parse and return ELFInfo.

    Adds various calculated properties to the ELF header, segments and sections.
    Such added properties are those with prefix 'x_' in the returned dicts.
    """
    local_path = pwndbg.file.get_file(filepath)
    with open(local_path, 'rb') as f:
        elffile = ELFFile(f)
        header = dict(elffile.header)
        segments = []
        for seg in elffile.iter_segments():
            s = dict(seg.header)
            s['x_perms'] = [
                mnemonic for mask, mnemonic in [(PF_R, 'read'), (PF_W, 'write'), (PF_X, 'execute')]
                if s['p_flags'] & mask != 0
            ]
            # end of memory backing
            s['x_vaddr_mem_end'] = s['p_vaddr'] + s['p_memsz']
            # end of file backing
            s['x_vaddr_file_end'] = s['p_vaddr'] + s['p_filesz']
            segments.append(s)
        sections = []
        for sec in elffile.iter_sections():
            s = dict(sec.header)
            s['x_name'] = sec.name
            s['x_addr_mem_end'] = s['x_addr_file_end'] = s['sh_addr'] + s['sh_size']
            sections.append(s)
        return ELFInfo(header, sections, segments)
Esempio n. 20
0
    def load_file(self, filename=None):
        if filename is None:
            raise FormatError("Filename not specified.")

        try:
            elffile = ELFFile(open(filename, 'rb'))
        except:
            raise FormatError("Could not open ELF file \"%s\"." % filename)

        section_lma_map = dict()

        for section in elffile.iter_sections():
            if section["sh_type"] != "SHT_PROGBITS":
                continue

            if section["sh_flags"] & SH_FLAGS.SHF_ALLOC:
                for segment in elffile.iter_segments():
                    if segment.section_in_segment(section):
                        if not segment in section_lma_map:
                            section_lma_map[segment] = segment["p_paddr"]

                        segment_offset = section_lma_map[segment]

                        section_lma_map[segment] += section["sh_size"]
                        break

                new_section = FormatELF_Section(section, segment_offset)

                section_name = section.name[1 : ]
                self.sections[section_name] = new_section


        if len(self.sections) == 0:
            raise FormatError("ELF file \"%s\" contains no data." % filename)
Esempio n. 21
0
	def runCode(self, fileName, parameters=None, stackSize=2048):
		message = LedgerWalletProxyRequest()
		f = open(fileName, 'rb')
		elffile = ELFFile(f)
		for section in elffile.iter_sections():
			if section.name == '.ledger':		
				message.startCode.signature = section.data()[0:ord(section.data()[1]) + 2]
				break
		if len(message.startCode.signature) == 0:
			raise Exception("Missing code signature")
		message.startCode.stackSize = stackSize
		message.startCode.entryPoint = elffile.header['e_entry']
		message.startCode.parameters = parameters
		for segment in elffile.iter_segments():
			if segment['p_type'] == 'PT_LOAD':
				codeRange = message.startCode.code.add()
				flags = 0
				if ((segment['p_flags'] & P_FLAGS.PF_W) == 0):
					flags = flags | 0x01
				codeRange.flags = flags
				codeRange.start = segment['p_vaddr']
				codeRange.end = segment['p_vaddr'] + segment['p_memsz']
				codeRange.dataLength = segment['p_filesz']
				codeRange.data = segment.data()

		response = self.transport.exchange(message)		
		while response.HasField('logAck'):
			print response.logAck.message
			message = LedgerWalletProxyRequest()
			message.resumeCode.CopyFrom(ResumeCode())
			response = self.transport.exchange(message)
		if response.HasField('startCodeResponseAck'):
			return response.startCodeResponseAck.response
		else:
			raise ProxyException("Unexpected response", response)
Esempio n. 22
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # The first DIE in each compile unit describes it.
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # Each DIE holds an OrderedDict of attributes, mapping names to
            # values. Values are represented by AttributeValue objects in
            # elftools/dwarf/die.py
            # We're interested in the DW_AT_name attribute. Note that its value
            # is usually a string taken from the .debug_str section. This
            # is done transparently by the library, and such a value will be
            # simply given as a string.
            name_attr = top_DIE.attributes['DW_AT_name']
            print('    name=%s' % bytes2str(name_attr.value))
 def test_irom_bin(self):
     with open(self.ELF, "rb") as f:
         e = ELFFile(f)
         irom_section = e.get_section_by_name(".irom0.text")
         self.assertEqual(irom_section.header.sh_size,
                          os.stat(self.BIN_IROM).st_size,
                          "IROM raw binary file should be same length as .irom0.text section")
Esempio n. 24
0
def load_symbols_elf(filename):
    """ Load the symbol tables contained in the file
    """
    f = open(filename, 'rb')

    elffile = ELFFile(f)

    symbols = []

    for section in elffile.iter_sections():
        if not isinstance(section, SymbolTableSection):
            continue

        if section['sh_entsize'] == 0:
            logger.warn("Symbol table {} has a sh_entsize of zero.".format(section.name))

            continue

        logger.info("Symbol table {} contains {} entries.".format(section.name, section.num_symbols()))

        for _, symbol in enumerate(section.iter_symbols()):
            if describe_symbol_shndx(symbol['st_shndx']) != "UND" and \
                describe_symbol_type(symbol['st_info']['type']) == "FUNC":
                symbols.append((symbol['st_value'], symbol['st_size'], symbol.name))

    f.close()

    symbols_by_addr = {
        addr: (name, size, True) for addr, size, name in symbols
    }

    return symbols_by_addr
Esempio n. 25
0
File: data.py Progetto: Frky/scat
    def parse(self, binary, libclang_path, srcdir=None):
        print(" * Checking dependencies")
        self.deps = []
        self.protos = dict()

        with open(binary, "rb") as f:
            elf_file = ELFFile(f)

            dynamic = elf_file.get_section_by_name(".dynamic")
            for tag in dynamic.iter_tags("DT_NEEDED"):
                print("     Found dependency {}".format(tag.needed))
                self.deps.append(tag.needed)

        for dep in self.deps:
            dep_data = Data(self.dstdir, dep)
            dep_data.load(False)
            self.protos.update(dep_data.protos)

        self.protos_without_libs = dict()

        if srcdir != None:
            print(" * Extracting data from source code")
            extractor = ClangExtractor(libclang_path, srcdir)
            self.protos_without_libs.update(extractor.extract())

        print(" * Extracting data from binary debug informations")
        extractor = DwarfExtractor()
        # self.protos_without_libs.update(extractor.extract(binary))

        self.protos.update(self.protos_without_libs)
Esempio n. 26
0
def section_info_highlevel(stream):
    print('High level API...')
    elffile = ELFFile(stream)

    # Just use the public methods of ELFFile to get what we need
    # Note that section names, like everything read from the file, are bytes
    # objects.
    print('  %s sections' % elffile.num_sections())
    section = elffile.get_section_by_name(b'.symtab')

    if not section:
        print('  No symbol table found. Perhaps this ELF has been stripped?')
        return

    # A section type is in its header, but the name was decoded and placed in
    # a public attribute.
    # bytes2str is used to print the name of the section for consistency of
    # output between Python 2 and 3. The section name is a bytes object.
    print('  Section name: %s, type: %s' %(
        bytes2str(section.name), section['sh_type']))

    # But there's more... If this section is a symbol table section (which is
    # the case in the sample ELF file that comes with the examples), we can
    # get some more information about it.
    if isinstance(section, SymbolTableSection):
        num_symbols = section.num_symbols()
        print("  It's a symbol section with %s symbols" % num_symbols)
        print("  The name of the last symbol in the section is: %s" % (
            bytes2str(section.get_symbol(num_symbols - 1).name)))
Esempio n. 27
0
 def test_core_prpsinfo(self):
     elf = ELFFile(self._core_file)
     for segment in elf.iter_segments():
         if not isinstance(segment, NoteSegment):
             continue
         notes = list(segment.iter_notes())
         for note in segment.iter_notes():
             if note['n_type'] != 'NT_PRPSINFO':
                 continue
             desc = note['n_desc']
             self.assertEqual(desc['pr_state'], 0)
             self.assertEqual(desc['pr_sname'], b'R')
             self.assertEqual(desc['pr_zomb'], 0)
             self.assertEqual(desc['pr_nice'], 0)
             self.assertEqual(desc['pr_flag'], 0x400600)
             self.assertEqual(desc['pr_uid'], 1000)
             self.assertEqual(desc['pr_gid'], 1000)
             self.assertEqual(desc['pr_pid'], 23395)
             self.assertEqual(desc['pr_ppid'], 23187)
             self.assertEqual(desc['pr_pgrp'], 23395)
             self.assertEqual(desc['pr_sid'], 23187)
             self.assertEqual(
                 desc['pr_fname'],
                 b'coredump_self\x00\x00\x00')
             self.assertEqual(
                 desc['pr_psargs'],
                 b'./coredump_self foo bar 42 ' + b'\x00' * (80 - 27))
Esempio n. 28
0
def process_file(filename):
    print("Processing file: ", filename)
    with open(filename, "rb") as file:

        # Note, we can't close file until we have finished reading data
        elf_file = ELFFile(file)

        # Code is normally in the data section
        text_section = elf_file.get_section_by_name(".text")
        base_address = text_section.header["sh_addr"]
        disassembly = distorm3.Decompose(base_address, text_section.data())

        # Get the symbol table as table of addresses mapped to names
        symbol_table_section = elf_file.get_section_by_name(".symtab")
        symbol_table = {}  # TODO: Fill in the symbol table...

        # Create an LLVM emulator
        emulator = Emulator("module", symbol_table)
        for instruction in disassembly:
            if hasattr(emulator, instruction.mnemonic):
                method = getattr(emulator, instruction.mnemonic)
                method(instruction)
            else:
                print(instruction.mnemonic + " not implemented yet. Please implement it!")

    return disassembly
Esempio n. 29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("input", help="The input ELF file", 
                        type=argparse.FileType("rb"))

    parser.add_argument("-j", "--javascript", 
                       help="Save shellcode to javascript file",
                       type=argparse.FileType("w+"))
    parser.add_argument("-r", "--raw",
                       help="Save shellcode to raw binary file",
                       type=argparse.FileType("wb+"))
    parser.add_argument("-c", "--cstub",
                       help="Save shellcode to a C stub test file",
                       type=argparse.FileType("wb+"))

    args = parser.parse_args()

    elffile = ELFFile(args.input)
    text = elffile.get_section_by_name(".text")
    data = text.data()

    if args.javascript is None and args.raw is None and args.cstub is None:
        sys.stderr.write("No output file(s) specified, nothing to do.\n")
        return

    if args.javascript is not None:
        write_js(args.javascript, data)
    
    if args.raw is not None:
        write_raw(args.raw, data)

    if args.cstub is not None:
        write_c(args.cstub, data)
Esempio n. 30
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # Start with the top DIE, the root for this CU's DIE tree
            top_DIE = CU.get_top_DIE()
            print('    Top DIE with tag=%s' % top_DIE.tag)

            # We're interested in the filename...
            print('    name=%s' % top_DIE.get_full_path())

            # Display DIEs recursively starting with top_DIE
            die_info_rec(top_DIE)
Esempio n. 31
0
def lddtree(path: str,
            root: str = '/',
            prefix: str = '',
            ldpaths: Optional[Dict[str, List[str]]] = None,
            display: Optional[str] = None,
            lib_cache: Dict = {},
            _first: bool = True,
            _all_libs: Dict[str, Any] = {}) -> Dict[str, Any]:
    """Parse the ELF dependency tree of the specified file

    Parameters
    ----------
    path
        The ELF to scan
    root
        The root tree to prepend to paths; this applies to interp and rpaths
        only as ``path`` and ``ldpaths`` are expected to be prefixed already
    prefix
        The path under ``root`` to search
    ldpaths
        dict containing library paths to search; should have the keys:
        conf, env, interp. If not supplied, the function ``load_ld_paths``
        will be called.
    display
        The path to show rather than ``path``
    lib_cache
        Can re-use result["libs"] dictionary from previous runs to skip searching for
        common libraries across a set of ELF binaries. Note that cache is read-only,
        it is not updated with newly found libraries.
        WARNING: using cache might not be safe if top level elfs use different `rpath/runpath`
        Example:
           elf1[rpath=/a] -> /a/libsomething.so
           elf2[rpath=/b] -> /b/libsomething.so
        Re-using cache from elf1 to compute lddtree for elf2 will result in incorrect mapping
        for libsomething.
    _first
        Recursive use only; is this the first ELF?
    _all_libs
        Recursive use only; dict of all libs we've seen

    Returns
    -------
    a dict containing information about all the ELFs; e.g.
    {
      'interp': '/lib64/ld-linux.so.2',
      'needed': ['libc.so.6', 'libcurl.so.4',],
      'libs': {
        'libc.so.6': {
          'path': '/lib64/libc.so.6',
          'needed': [],
        },
        'libcurl.so.4': {
          'path': '/usr/lib64/libcurl.so.4',
          'needed': ['libc.so.6', 'librt.so.1',],
        },
      },
    }
    """
    if _first:
        _all_libs = {}
        ldpaths = load_ld_paths().copy()
    else:
        assert ldpaths is not None

    ret = {
        'interp': None,
        'path': path if display is None else display,
        'realpath': path,
        'needed': [],
        'rpath': [],
        'runpath': [],
        'libs': _all_libs,
    }  # type: Dict[str, Any]

    log.debug('lddtree(%s)' % path)

    with open(path, 'rb') as f:
        elf = ELFFile(f)

        # If this is the first ELF, extract the interpreter.
        if _first:
            for segment in elf.iter_segments():
                if segment.header.p_type != 'PT_INTERP':
                    continue

                interp = segment.get_interp_name()
                log.debug('  interp           = %s', interp)
                ret['interp'] = normpath(root + interp)
                ret['libs'][os.path.basename(interp)] = {
                    'path': ret['interp'],
                    'realpath': readlink(ret['interp'], root, prefixed=True),
                    'needed': [],
                }
                # XXX: Should read it and scan for /lib paths.
                ldpaths['interp'] = [
                    normpath(root + os.path.dirname(interp)),
                    normpath(root + prefix + '/usr' +
                             os.path.dirname(interp).lstrip(prefix)),
                ]
                log.debug('  ldpaths[interp]  = %s', ldpaths['interp'])
                break

        # Parse the ELF's dynamic tags.
        libs = []  # type: List[str]
        rpaths = []  # type: List[str]
        runpaths = []  # type: List[str]
        for segment in elf.iter_segments():
            if segment.header.p_type != 'PT_DYNAMIC':
                continue

            for t in segment.iter_tags():
                if t.entry.d_tag == 'DT_RPATH':
                    rpaths = parse_ld_paths(t.rpath, root=root, path=path)
                elif t.entry.d_tag == 'DT_RUNPATH':
                    runpaths = parse_ld_paths(t.runpath, root=root, path=path)
                elif t.entry.d_tag == 'DT_NEEDED':
                    libs.append(t.needed)
            if runpaths:
                # If both RPATH and RUNPATH are set, only the latter is used.
                rpaths = []

            # XXX: We assume there is only one PT_DYNAMIC.  This is
            # probably fine since the runtime ldso does the same.
            break
        if _first:
            # Propagate the rpaths used by the main ELF since those will be
            # used at runtime to locate things.
            ldpaths['rpath'] = rpaths
            ldpaths['runpath'] = runpaths
            log.debug('  ldpaths[rpath]   = %s', rpaths)
            log.debug('  ldpaths[runpath] = %s', runpaths)
        ret['rpath'] = rpaths
        ret['runpath'] = runpaths
        ret['needed'] = libs

        # Search for the libs this ELF uses.
        all_ldpaths = None  # type: Optional[List[str]]
        for lib in libs:
            if lib in _all_libs:
                continue
            cached = lib_cache.get(lib, None)
            if cached is not None:
                _all_libs[lib] = cached
                continue

            if all_ldpaths is None:
                all_ldpaths = (ldpaths['rpath'] + rpaths + runpaths +
                               ldpaths['env'] + ldpaths['runpath'] +
                               ldpaths['conf'] + ldpaths['interp'])
            realpath, fullpath = find_lib(elf, lib, all_ldpaths, root)
            _all_libs[lib] = {
                'realpath': realpath,
                'path': fullpath,
                'needed': [],
            }
            if fullpath:
                lret = lddtree(realpath,
                               root,
                               prefix,
                               ldpaths,
                               display=fullpath,
                               lib_cache=lib_cache,
                               _first=False,
                               _all_libs=_all_libs)
                _all_libs[lib]['needed'] = lret['needed']

        del elf

    return ret
Esempio n. 32
0
from elftools.elf.sections import SymbolTableSection
from panda import Panda, blocking, ffi

arch = "x86_64" if len(argv) <= 1 else argv[1]
extra = "-nographic -chardev socket,id=monitor,path=./monitor.sock,server,nowait -monitor chardev:monitor -serial telnet:127.0.0.1:4445,server,nowait  -device e1000,netdev=net0 -netdev user,id=net0,hostfwd=tcp::5556-:22 -cdrom /home/luke/workspace/qcows/instance-1-cidata.iso"
qcow = "/home/luke/workspace/qcows/instance-1.qcow2"
panda = Panda(arch=arch, qcow=qcow, extra_args=extra, mem="1G")

out = []
mappings = {}
bin_dir = "taint"
bin_name = "taint"

# Read symbols from bin into mappings
with open(path.join(bin_dir, bin_name), 'rb') as f:
    our_elf = ELFFile(f)
    for section in our_elf.iter_sections():
        if not isinstance(section, SymbolTableSection): continue
        for symbol in section.iter_symbols():
            if len(symbol.name):  # Sometimes empty
                mappings[symbol['st_value']] = symbol.name

tainted = False
g_phys_addrs = []


@panda.cb_before_block_exec()
def bbe(cpu, tb):
    if tb.pc in mappings:
        print('\nRunning function: {}'.format(mappings[tb.pc]))
        if mappings[tb.pc] == "query_taint":
    R_AMD64_DTPMOD64=0x00000010,
    R_AMD64_DTPOFF64=0x00000011,
    R_AMD64_TPOFF64=0x00000012,
    R_AMD64_TLSGD=0x00000013,
    R_AMD64_TLSLD=0x00000014,
    R_AMD64_DTPOFF32=0x00000015,
    R_AMD64_GOTTPOFF=0x00000016,
    R_AMD64_TPOFF32=0x00000017,
    R_AMD64_PC64=0x00000018,
    R_AMD64_GOTOFF64=0x00000019,
    R_AMD64_GOTPC32=0x0000001A,
)

with open(args.infile, "rb") as f:
    # Leverage PyElfTools for handling some of the parsing
    elf = ELFFile(f)

    dynamicPH = None
    dynlibDataPH = None

    # Find the dynamic segment
    for segment in elf.iter_segments():
        if segment.header.p_type == 'PT_DYNAMIC':
            dynamicPH = segment.header

        if segment.header.p_type == PT_SCE_DYNLIBDATA:
            dynlibDataPH = segment.header

    if dynamicPH == None:
        print("An error occurred, as the ELF is not a valid OELF!")
        sys.exit(1)
Esempio n. 34
0
def main():
    """Main program"""
    global syms
    parse_args()

    with open(args.kernel, "rb") as elf_fp:
        kernel = ELFFile(elf_fp)
        syms = get_symbols(kernel)

        sym_dummy_pagetables = find_symbol(kernel, "dummy_pagetables")
        if sym_dummy_pagetables:
            reserved_pt_size = sym_dummy_pagetables['st_size']
        else:
            reserved_pt_size = None

    if isdef("CONFIG_X86_64"):
        pclass = PtablesIA32e
    elif isdef("CONFIG_X86_PAE"):
        pclass = PtablesPAE
    else:
        pclass = Ptables32bit

    debug("building %s" % pclass.__name__)

    vm_base = syms["CONFIG_KERNEL_VM_BASE"]
    vm_size = syms["CONFIG_KERNEL_VM_SIZE"]
    vm_offset = syms["CONFIG_KERNEL_VM_OFFSET"]

    sram_base = syms["CONFIG_SRAM_BASE_ADDRESS"]
    sram_size = syms["CONFIG_SRAM_SIZE"] * 1024

    mapped_kernel_base = syms["z_mapped_start"]
    mapped_kernel_size = syms["z_mapped_size"]

    if isdef("CONFIG_SRAM_OFFSET"):
        sram_offset = syms["CONFIG_SRAM_OFFSET"]
    else:
        sram_offset = 0

    # Figure out if there is any need to do virtual-to-physical
    # address translation
    virt_to_phys_offset = (sram_base + sram_offset) - (vm_base + vm_offset)

    if isdef("CONFIG_ARCH_MAPS_ALL_RAM"):
        image_base = sram_base
        image_size = sram_size
    else:
        image_base = mapped_kernel_base
        image_size = mapped_kernel_size

    image_base_phys = image_base + virt_to_phys_offset

    ptables_phys = syms["z_x86_pagetables_start"] + virt_to_phys_offset

    debug("Address space: 0x%x - 0x%x size 0x%x" %
          (vm_base, vm_base + vm_size - 1, vm_size))

    debug("Zephyr image: 0x%x - 0x%x size 0x%x" %
          (image_base, image_base + image_size - 1, image_size))

    if virt_to_phys_offset != 0:
        debug("Physical address space: 0x%x - 0x%x size 0x%x" %
              (sram_base, sram_base + sram_size - 1, sram_size))

    is_perm_regions = isdef("CONFIG_SRAM_REGION_PERMISSIONS")

    # Are pages in non-boot, non-pinned sections present at boot.
    is_generic_section_present = isdef(
        "CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT")

    if image_size >= vm_size:
        error("VM size is too small (have 0x%x need more than 0x%x)" %
              (vm_size, image_size))

    map_flags = 0

    if is_perm_regions:
        # Don't allow execution by default for any pages. We'll adjust this
        # in later calls to pt.set_region_perms()
        map_flags = ENTRY_XD

    pt = pclass(ptables_phys)
    # Instantiate all the paging structures for the address space
    pt.reserve(vm_base, vm_size)
    # Map the zephyr image
    if is_generic_section_present:
        map_flags = map_flags | FLAG_P
        pt.map(image_base_phys, image_base, image_size, map_flags | ENTRY_RW)
    else:
        # When generic linker sections are not present in physical memory,
        # the corresponding virtual pages should not be mapped to non-existent
        # physical pages. So simply identity map them to create the page table
        # entries but without the present bit set.
        # Boot and pinned sections (if configured) will be mapped to
        # physical memory below.
        pt.map(image_base, image_base, image_size, map_flags | ENTRY_RW)

    if virt_to_phys_offset != 0:
        # Need to identity map the physical address space
        # as it is needed during early boot process.
        # This will be unmapped once z_x86_mmu_init()
        # is called.
        # Note that this only does the identity mapping
        # at the page directory level to minimize wasted space.
        pt.reserve_unaligned(image_base_phys, image_size, to_level=PD_LEVEL)
        pt.identity_map_unaligned(image_base_phys,
                                  image_size,
                                  FLAG_P | FLAG_RW | FLAG_SZ,
                                  level=PD_LEVEL)

    if isdef("CONFIG_X86_64"):
        # 64-bit has a special region in the first 64K to bootstrap other CPUs
        # from real mode
        locore_base = syms["_locore_start"]
        locore_size = syms["_lodata_end"] - locore_base
        debug("Base addresses: physical 0x%x size 0x%x" %
              (locore_base, locore_size))
        pt.map(locore_base, None, locore_size, map_flags | FLAG_P | ENTRY_RW)

    if isdef("CONFIG_XIP"):
        # Additionally identity-map all ROM as read-only
        pt.map(syms["CONFIG_FLASH_BASE_ADDRESS"], None,
               syms["CONFIG_FLASH_SIZE"] * 1024, map_flags | FLAG_P)

    if isdef("CONFIG_LINKER_USE_BOOT_SECTION"):
        pt.map_region("lnkr_boot", map_flags | FLAG_P | ENTRY_RW,
                      virt_to_phys_offset)

    if isdef("CONFIG_LINKER_USE_PINNED_SECTION"):
        pt.map_region("lnkr_pinned", map_flags | FLAG_P | ENTRY_RW,
                      virt_to_phys_offset)

    # Process extra mapping requests
    if args.map:
        map_extra_regions(pt)

    # Adjust mapped region permissions if configured
    if is_perm_regions:
        # Need to accomplish the following things:
        # - Text regions need the XD flag cleared and RW flag removed
        #   if not built with gdbstub support
        # - Rodata regions need the RW flag cleared
        # - User mode needs access as we currently do not separate application
        #   text/rodata from kernel text/rodata
        if isdef("CONFIG_GDBSTUB"):
            flags = ENTRY_US | ENTRY_RW
        else:
            flags = ENTRY_US

        if is_generic_section_present:
            flags = flags | FLAG_P

        pt.set_region_perms("__text_region", flags)

        if isdef("CONFIG_LINKER_USE_BOOT_SECTION"):
            pt.set_region_perms("lnkr_boot_text", flags | FLAG_P)

        if isdef("CONFIG_LINKER_USE_PINNED_SECTION"):
            pt.set_region_perms("lnkr_pinned_text", flags | FLAG_P)

        flags = ENTRY_US | ENTRY_XD
        if is_generic_section_present:
            flags = flags | FLAG_P

        pt.set_region_perms("__rodata_region", flags)

        if isdef("CONFIG_LINKER_USE_BOOT_SECTION"):
            pt.set_region_perms("lnkr_boot_rodata", flags | FLAG_P)

        if isdef("CONFIG_LINKER_USE_PINNED_SECTION"):
            pt.set_region_perms("lnkr_pinned_rodata", flags | FLAG_P)

        if isdef("CONFIG_COVERAGE_GCOV") and isdef("CONFIG_USERSPACE"):
            # If GCOV is enabled, user mode must be able to write to its
            # common data area
            pt.set_region_perms("__gcov_bss",
                                FLAG_P | ENTRY_RW | ENTRY_US | ENTRY_XD)

        if isdef("CONFIG_X86_64"):
            # Set appropriate permissions for locore areas much like we did
            # with the main text/rodata regions

            if isdef("CONFIG_X86_KPTI"):
                # Set the User bit for the read-only locore/lorodata areas.
                # This ensures they get mapped into the User page tables if
                # KPTI is turned on. There is no sensitive data in them, and
                # they contain text/data needed to take an exception or
                # interrupt.
                flag_user = ENTRY_US
            else:
                flag_user = 0

            pt.set_region_perms("_locore", FLAG_P | flag_user)
            pt.set_region_perms("_lorodata", FLAG_P | ENTRY_XD | flag_user)

    written_size = pt.write_output(args.output)
    debug("Written %d bytes to %s" % (written_size, args.output))

    # Warn if reserved page table is not of correct size
    if reserved_pt_size and written_size != reserved_pt_size:
        # Figure out how many extra pages needed
        size_diff = written_size - reserved_pt_size
        page_size = syms["CONFIG_MMU_PAGE_SIZE"]
        extra_pages_needed = int(round_up(size_diff, page_size) / page_size)

        if isdef("CONFIG_X86_EXTRA_PAGE_TABLE_PAGES"):
            extra_pages_kconfig = syms["CONFIG_X86_EXTRA_PAGE_TABLE_PAGES"]
            if isdef("CONFIG_X86_64"):
                extra_pages_needed += ctypes.c_int64(extra_pages_kconfig).value
            else:
                extra_pages_needed += ctypes.c_int32(extra_pages_kconfig).value

        reason = "big" if reserved_pt_size > written_size else "small"

        error(("Reserved space for page table is too %s."
               " Set CONFIG_X86_EXTRA_PAGE_TABLE_PAGES=%d") %
              (reason, extra_pages_needed))
Esempio n. 35
0
 def __init__(self, filename):
     self.filename = filename
     with open(self.filename, 'rb') as f:
         self.elffile = ELFFile(f)
Esempio n. 36
0
 def __init__(self, path, mode):
     self.f = open(path, mode)
     self.elf = ELFFile(self.f)
Esempio n. 37
0
class ElfParser(object):
    def __init__(self, elf_file):
        self._elf = ELFFile(elf_file)
        self.symbol_table = None
        self.dwarf_info = None

    """
    Public methods
    """

    def parse_symbol_table(self):
        """ build symbol table data structure

        :return: list of symbols
        """
        if self.symbol_table is None:
            self.symbol_table = SymbolTable()

            symbol_tables = [
                section for section in self._elf.iter_sections()
                if isinstance(section, SymbolTableSection)
            ]
            for section in symbol_tables:
                for symbol in section.iter_symbols():
                    if ((int(symbol["st_size"]) > 0)
                            and ("OBJECT" == describe_symbol_type(
                                symbol["st_info"]["type"]))):
                        symbol_entry = Symbol(symbol.name, symbol["st_value"],
                                              symbol["st_size"])
                        self.symbol_table.add_symbol(symbol_entry)

        return self.symbol_table

    def parse_dwarf_info(self):
        """ build dwarf info data structure

        :return: OrderedDict
        """
        if self.dwarf_info is None:
            self.dwarf_info = OrderedDict()

            logging.debug('Parsing DWARF Info...')
            dwarf_info = self._elf.get_dwarf_info()
            if not dwarf_info.has_debug_info:
                raise ValueError(
                    "Debug information not available in ELF file. \
                                    Symbol table will be empty")

            for cu in dwarf_info.iter_CUs():
                die_depth = 0
                for die in cu.iter_DIEs():

                    if die.is_null():
                        die_depth -= 1
                        continue

                    # abbreviation property of interest
                    abbreviation = OrderedDict()
                    abbreviation["depth"] = die_depth
                    abbreviation["offset"] = die.offset
                    abbreviation["code"] = die.abbrev_code
                    abbreviation["tag"] = die.tag if not die.is_null() else ""
                    abbreviation["attr"] = []

                    abbreviation_log_string = " <{0}><{1}>: Abbrev Number: {2} ({3})".format(
                        die_depth, hex(die.offset), die.abbrev_code, die.tag)
                    logging.debug(abbreviation_log_string)

                    for attr in itervalues(die.attributes):
                        description = self._get_attribute_description(
                            attr, die)

                        if description is not None:
                            attr_dict = OrderedDict()
                            attr_dict["offset"] = attr.offset
                            attr_dict["name"] = attr.name
                            attr_dict["desc"] = description
                            abbreviation["attr"].append(attr_dict)

                            log_description = hex(description) if isinstance(
                                description, int) else description
                            attribute_log_string = "    <{0}>   {1}: {2}".format(
                                hex(attr.offset), attr.name, log_description)
                            logging.debug(attribute_log_string)

                    if abbreviation["attr"]:
                        self.dwarf_info[die.offset] = abbreviation

                    if die.has_children:
                        die_depth += 1

        return self.dwarf_info

    """
    Private methods
    """

    def _get_attribute_description(self, attr, die):
        """ Use regex to parse attribute description (value)
        """
        description = describe_attr_value(attr, die, 0)
        regex_pattern = ""
        if "DW_AT_name" == attr.name:
            regex_pattern = "^([\w ]+\t)|: ([\w ]+\t)$"
        elif "DW_AT_type" == attr.name:
            regex_pattern = "^<(0x[\da-fA-F]+)>\t$"
        elif "DW_AT_location" == attr.name:
            regex_pattern = ".*DW_OP_addr: ([\w]+)"
        elif "DW_AT_data_member_location" == attr.name:
            regex_pattern = "^([\d]+\t)$"
        elif "DW_AT_byte_size" == attr.name:
            regex_pattern = "^([\d]+\t)$"

        if "" != regex_pattern:
            match = re.compile(regex_pattern)
            match = match.search(description)
            if match:
                match_group = match.groups()

                if attr.name in ["DW_AT_type", "DW_AT_location"]:
                    description = match_group[0].rstrip()
                    description = int(description, 16)

                elif attr.name in [
                        "DW_AT_data_member_location", "DW_AT_byte_size"
                ]:
                    description = match_group[0].rstrip()
                    description = int(description)

                elif attr.name in ["DW_AT_name"]:
                    index = [
                        match for match in range(len(match_group))
                        if match_group[match] != None
                    ]
                    description = match_group[index[0]].rstrip()
                else:
                    pass
            else:
                description = description.rstrip()
        else:
            description = None

        return description
Esempio n. 38
0
                file_entry = line_program['file_entry'][
                    die.attributes['DW_AT_decl_file'].value - 1]
                dir_entry = line_program['include_directory'][
                    file_entry.dir_index - 1]
                file = path.join(dir_entry, file_entry.name)

                if not type or any(t == type for t in variable_types(cu, die)):
                    yield (name, file, die.attributes['DW_AT_decl_line'].value)


if __name__ == "__main__":
    from elftools.elf.elffile import ELFFile
    import sys

    with open(sys.argv[1], 'rb') as file:
        elf = ELFFile(file)

        print "Constants:"
        for k, v in find_constants(elf):
            print '%s = %s' % (k, v)
        print

        print "Types:"
        for k in find_types(elf):
            print k
        print

        print "Variables:"
        for k, f, l in find_variables(elf):
            print '%s = %s:%s' % (k, f, l)
        print
Esempio n. 39
0
    code = ''

    while i != (len(bytes) + 1):
        code += bytes.hex()[x:y]
        if (isPrime(i)):
            #code += random()
            code += "FF"
        x = y
        y += 2
        i += 1
    return code


path = sys.argv[1]
f = open(path, 'rb')  # read binary
elf = ELFFile(f)  # map file as ELF
code = elf.get_section_by_name('.text')  # extract .text section
ops = code.data()  # get data from text

payload = insertion(ops)  # insert random values if prime number
original_xcode, original_ncode = (shellcode(ops.hex())
                                  )  # returns payload with \ and ,
prime_xcode, prime_ncode = (shellcode(payload))  # returns payload with \ and ,

original_lcode = int(len(str(original_xcode)) / 4)  # returns length shellcode
prime_lcode = int(len(str(prime_xcode)) / 4)  # returns length shellcode
print("Original shellcode length:", original_lcode, hex(original_lcode))
print("Original opcodes:", original_xcode, "\n", original_ncode)
print()
print("Prime shellcode length:", prime_lcode, hex(prime_lcode))
print("Prime opcodes:", prime_xcode, "\n", prime_ncode)
Esempio n. 40
0
class ELFBinaryFile(object):
    """! @brief An ELF binary executable file.
    
    Examines the ELF and provides several lists of useful data: section objects, and both used
    and unused ranges of memory.
    
    An ELFSection object is created for each of the sections of the file that are loadable code or
    data, or otherwise occupy memory. These are normally the .text, .rodata, .data, and .bss
    sections. More specifically, the list of sections contains any section with a type of
    `SHT_PROGBITS` or `SHT_NOBITS`. Also, at least one of the `SHF_WRITE`, `SHF_ALLOC`, or
    `SHF_EXECINSTR` flags must be set.
    
    The set of sections is compared with the target's memory map to produce a lists of the used
    (occupied) and unused (unoccupied) ranges of memory. Note that if the executable uses ranges
    of memory not mapped with a section of the ELF file, those ranges will not be considered in
    the used/unused lists. Also, only ranges completely contained within a region of the memory
    map are considered.
    """
    def __init__(self, elf, memory_map=None):
        self._owns_file = False
        if isinstance(elf, six.string_types):
            self._file = open(elf, 'rb')
            self._owns_file = True
        else:
            self._file = elf
        self._elf = ELFFile(self._file)
        self._memory_map = memory_map or MemoryMap()

        self._symbol_decoder = None
        self._address_decoder = None

        self._extract_sections()
        self._compute_regions()

    def __del__(self):
        """! @brief Close the ELF file if it is owned by this instance."""
        if hasattr(self, '_owns_file') and self._owns_file:
            self.close()

    def _extract_sections(self):
        """! Get list of interesting sections."""
        self._sections = []
        sections = self._elf.iter_sections()
        for s in sections:
            # Skip sections not of these types.
            if s['sh_type'] not in ('SHT_PROGBITS', 'SHT_NOBITS'):
                continue

            # Skip sections that don't have one of these flags set.
            if s['sh_flags'] & (SH_FLAGS.SHF_WRITE | SH_FLAGS.SHF_ALLOC
                                | SH_FLAGS.SHF_EXECINSTR) == 0:
                continue

            self._sections.append(ELFSection(self, s))
        self._sections.sort(key=lambda x: x.start)

    def _dump_sections(self):
        for s in self._sections:
            print("{0:<20} {1:<25} {2:<10} {3:<10}".format(
                s.name, s.flags_description, hex(s.start), hex(s.length)))

    def _compute_regions(self):
        used = []
        unused = []
        for region in self._memory_map:
            current = region.start
            for sect in self._sections:
                start = sect.start
                length = sect.length

                # Skip if this section isn't within this memory region.
                if not region.contains_range(start, length=length):
                    continue

                # Add this section as used.
                used.append(
                    MemoryRange(start=start, length=length, region=region))

                # Add unused segment.
                if start > current:
                    unused.append(
                        MemoryRange(start=current,
                                    length=(start - current),
                                    region=region))

                current = start + length

            # Add a final unused segment of the region.
            if region.end > current:
                unused.append(
                    MemoryRange(start=current, end=region.end, region=region))
        self._used = used
        self._unused = unused

    def close(self):
        self._file.close()
        self._owns_file = False

    def read(self, addr, size):
        """! @brief Read program data from the elf file.

        @param addr Physical address (load address) to read from.
        @param size Number of bytes to read.
        @return Requested data or None if address is unmapped.
        """
        for segment in self._elf.iter_segments():
            seg_addr = segment["p_paddr"]
            seg_size = min(segment["p_memsz"], segment["p_filesz"])
            if addr >= seg_addr + seg_size:
                continue
            if addr + size <= seg_addr:
                continue
            # There is at least some overlap

            if addr >= seg_addr and addr + size <= seg_addr + seg_size:
                # Region is fully contained
                data = segment.data()
                start = addr - seg_addr
                return data[start:start + size]

    @property
    def sections(self):
        """! @brief Access the list of sections in the ELF file.
        @return A list of ELFSection objects sorted by start address.
        """
        return self._sections

    @property
    def used_ranges(self):
        """! @brief Access the list of used ranges of memory in the ELF file.
        @return A list of MemoryRange objects sorted by start address.
        """
        return self._used

    @property
    def unused_ranges(self):
        """! @brief Access the list of unused ranges of memory in the ELF file.
        @return A list of MemoryRange objects sorted by start address.
        """
        return self._unused

    @property
    def symbol_decoder(self):
        if self._symbol_decoder is None:
            self._symbol_decoder = ElfSymbolDecoder(self._elf)
        return self._symbol_decoder

    @property
    def address_decoder(self):
        if self._address_decoder is None:
            self._address_decoder = DwarfAddressDecoder(self._elf)
        return self._address_decoder
Esempio n. 41
0
class DWARFCore(object):
    """ DWARF core handler class"""
    def __init__(self, in_file):
        try:
            from elftools.elf.elffile import ELFFile

            if not _is_ELF_file(in_file):
                print("{} is not an ELF-format binary".format(in_file))
                self._dwarf_info = None
                return

            f = open(in_file, 'rb')
            self._felf = ELFFile(f)
            self._arch = self._felf.get_machine_arch()
            self._dwarf_info = self._felf.get_dwarf_info()
            if self._dwarf_info is None:
                return

            # Load die to the die cache
            for cu in self._dwarf_info.iter_CUs():
                top_die = cu.get_top_DIE()
                DWARFCache._dw_cu_cache[cu.cu_offset] = DWCompileUnit(top_die)
                for die in cu.iter_DIEs():
                    DWARFCache._offset_to_die[die.offset] = die

            self._load_types()
            self._load_subprograms()
            self._load_globalvars()
        except ImportError:
            pass

    def _process_dies(self, die, fn):
        fn(die)
        for child in die.iter_children():
            self._process_dies(child, fn)

    def _load_types(self):
        for cu in self._dwarf_info.iter_CUs():
            top = cu.get_top_DIE()
            self._process_dies(top, DWARFCache._process_types)

    def _load_subprograms(self):
        for cu in self._dwarf_info.iter_CUs():
            for die in cu.iter_DIEs():
                if is_subprogram(die):
                    func = DWFunction(die)
                    DWARFCache._dw_subprogram_cache[func.address] = func

    def _load_globalvars(self):
        for cu in self._dwarf_info.iter_CUs():
            for die in cu.iter_DIEs():
                if die.tag != "DW_TAG_variable" or die.get_parent(
                ).tag != "DW_TAG_compile_unit":
                    continue

                if not die.attributes.get("DW_AT_external"):
                    gvar = DWVariable(die)
                    DWARFCache._dw_global_var_cache[gvar.address] = gvar

    def get_function(self, address):
        """ Get the dwarf function for an address
    """
        try:
            return DWARFCache._dw_subprogram_cache[address]
        except KeyError:
            return None

    def get_global_variable(self, address):
        """ Get the dwarf global variable for an address
    """
        try:
            return DWARFCache._dw_global_var_cache[address]
        except KeyError:
            return None
Esempio n. 42
0
        return 2
    elif name == '.rodata':
        return 3
    elif name == '.bss': # TODO is this actually a thing? It doesn't fit in 2 bits and why would there be a relocation in .bss?
        return 4
    else:
        assert False, 'Unrecognized section for relocation: ' + name

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help='Input object file to create overlay info', metavar='input')
    parser.add_argument('output', help='Overlay info output', metavar='output')
    args = parser.parse_args()

    with open(args.input, 'rb') as f, open(args.output, 'w') as out:
        elffile = ELFFile(f)

        out.write('.section .ovl\n');

        relocs = []
        for section in elffile.iter_sections():
            if isinstance(section, RelocationSection):
                symtab = elffile.get_section(section['sh_link'])
                for reloc in section.iter_relocations():
                    symbol = symtab.get_symbol(reloc['r_info_sym'])
                    if symbol.entry['st_shndx'] != 'SHN_UNDEF':
                        section_id = get_section_type_from_name(section.name[4:])
                        relocation_type = reloc['r_info_type']
                        offset = reloc['r_offset']
                        assert offset <= 0xFFFFFF, 'Object too big to convert into overlay'
                        word = (section_id << 30) | (relocation_type << 24) | (offset)
Esempio n. 43
0
def fixFunctionStartInGaps(bblLayout, binary):
    func_from_symbols = set()
    text_base_addr = None
    (text_start, text_end) = readSectionRange(binary, ".text")
    with open(binary, 'rb') as openFile:
        elf = ELFFile(openFile)
        sym_sec = elf.get_section_by_name('.symtab')
        if sym_sec == None:
            return bblLayout
        for sym in sym_sec.iter_symbols():
            addr = sym['st_value']
            if 'STT_FUNC' == sym.entry['st_info']['type'] and (
                    addr >= text_start and addr < text_end):
                func_from_symbols.add(sym['st_value'])

        for sec in elf.iter_sections():
            if '.text' == sec.name:
                text_base_addr = sec['sh_addr']
                break
    # we only collect .text information for now
    if text_base_addr == None:
        logging.warning(
            "[fixFunctionStartInGaps]: This function only works with executable/libs!"
        )
        return bblLayout

    bb_dict_index = dict()
    bb_list = dict()
    for idx in range(len(bblLayout)):
        sz = bblLayout[idx].bb_size
        offset = bblLayout[idx].offset
        bb_list[offset] = sz
        bb_dict_index[offset] = idx
        sec_name = bblLayout[idx].section_name

    sorted_bb_list = sorted(bb_list.items())

    range_not_included = list()
    idx_num = 0
    length = len(sorted_bb_list)
    for (bb_offset, size) in sorted_bb_list:
        idx_num += 1
        if idx_num < length:
            next_offset = bb_offset + size
            if next_offset not in bb_list:  #and (next_offset - 1) not in bb_list and (next_offset + 1) not in bb_list:
                range_not_included.append(
                    (next_offset, sorted_bb_list[idx_num][0]))

    func_idx = 0
    for func in func_from_symbols:
        func_offset = func - text_base_addr
        if func_offset not in bb_list:
            logging.warning("Function start#%d 0x%x not in bb list" %
                            (func_idx, func))
            func_idx += 1
            # check the if the function between a gap
            for (start, end) in range_not_included:
                if func_offset >= start and func_offset < end:
                    logging.debug(
                        "[fixFunctionStartInGaps]: found function start 0x%x between gaps(0x%x - 0x%x)!"
                        % (func, text_base_addr + start, text_base_addr + end))
                    # we assume the handwritten .byte is not toooo much
                    if end - func_offset < 0x20 and end in bb_dict_index:
                        logging.debug(
                            "lalal, we find .byte(s) before function start!")
                        idx = bb_dict_index[end]
                        added_size = end - func_offset
                        old_start = bblLayout[idx].offset + text_base_addr
                        new_start = old_start - added_size
                        bblLayout[idx].bb_size += added_size
                        bblLayout[idx].offset -= added_size
                        logging.debug(
                            "[fixFunctionStartInGaps]: change bb from 0x%x to 0x%x"
                            % (old_start, new_start))
    return bblLayout
Esempio n. 44
0
def get(host, flag_id, flag, vuln):
    mch = CheckMachine(host, PORT)
    
    d = json.loads(flag_id)

    u, p, fId, fType, sign, b = d["u"], d["p"], d["fId"], d["fType"], d["sign"], d["b"]

    s = mch.login_user(u, p)

    f = mch.download(s, fId)

    assert_in(flag.encode(), base64.b64decode(f.encode()), "No flag in file", Status.CORRUPT)

    mch.check_info(s, fId, 0, 1, [
        {'res': fType, 'sign': sign}
    ], True)

    with open(b, "rb") as f:
        elf = ELFFile(f)
        section = elf.get_section_by_name(".text")
        stext = section.data()
        ssize = section.data_size

    mch.check_get_signature(s, ssize, stext, fId)

    u2, p2 = mch.register_user()

    s2 = mch.login_user(u2, p2)

    mch.check_get_signature(s2, ssize, stext, fId)

    mch.check_add_signature(
        s2,
        ssize,
        stext,
        fId,
        choice(["malware", "worm", "trojan", "virus"]),
        False
    )

    assert_in('session', s2.cookies, 'Invalid session')

    uid = s2.cookies['session']

    mch.invite(s, uid, fId)

    sign2 = mch.check_add_signature(
        s2,
        ssize,
        stext,
        fId,
        choice(["malware", "worm", "trojan", "virus"])
    )

    mch.check_info(s2, fId, 1, 1, [
        {'res': fType, 'sign': sign},
        {'res': 'virus', 'sign': sign2}
    ], True)

    l = mch.list_no_auth()

    assert_in(fId, l, 'Could not find flag file')

    cquit(Status.OK)
Esempio n. 45
0
def isPIE(binary_path):
    with open(binary_path, 'rb') as open_file:
        elffile = ELFFile(open_file)
        base_address = next(seg for seg in elffile.iter_segments()
                            if seg['p_type'] == "PT_LOAD")['p_vaddr']
        return elffile['e_type'] == 'ET_DYN'
Esempio n. 46
0
def readElfClass(binary):
    result = 64
    with open(binary, 'rb') as openFile:
        elffile = ELFFile(openFile)
        result = elffile.elfclass
    return result
Esempio n. 47
0
 def __init__(self, debug_filename):
     with open(debug_filename, 'rb') as f:
         elffile = ELFFile(f)
         assert elffile.has_dwarf_info(), debug_filename + ' has no DWARF info'
         self.dwarfinfo = elffile.get_dwarf_info()
         return
Esempio n. 48
0
def pack_staticr_rel(elf_path, rel_path, orig_dir):
    rel = Rel()

    # TODO hardcoded path
    orig_dir = Path(orig_dir)
    rel.load_reloc(0, orig_dir / "pal" / "rel" / "dol_rel.bin")
    rel.load_reloc(1, orig_dir / "pal" / "rel" / "rel_abs.bin")

    with open(elf_path, "rb") as f:
        elf = ELFFile(f)

        rel.section_info[1] = read_elf_sec(elf, ".text")
        rel.section_info[2] = read_elf_sec(elf, ".ctors")
        rel.section_info[3] = read_elf_sec(elf, ".dtors")
        rel.section_info[4] = read_elf_sec(elf, ".rodata")
        rel.section_info[5] = read_elf_sec(elf, ".data")
        rel.section_info[6] = read_elf_sec(elf, ".bss")

        # .rodata padding hack
        rodata = rel.section_info[4]
        rodata.data = rodata.data[:-16]
        rodata.length = len(rodata.data)

        # Jump to _Unresolved
        _unresolved = 0x805553B0
        text = rel.section_info[1]
        relocs = elf.get_section_by_name(".rela.text")
        if relocs:
            for reloc_acc in relocs.iter_relocations():
                reloc = reloc_acc.entry
                if reloc.r_info_type == R_PPC_REL24:
                    instruction = struct.unpack(
                        ">I", text.data[reloc.r_offset:reloc.r_offset + 4])[0]
                    instruction_addr = 0x805103B4 + reloc.r_offset

                    delta = _unresolved - instruction_addr
                    new_ins = (instruction & ~0x03FFFFFC) | (delta
                                                             & 0x03FFFFFC)

                    # print(hex(instruction))
                    # print(hex(new_ins))

                    packed = struct.pack(">I", new_ins)
                    for i in range(4):
                        text.data[reloc.r_offset + i] = packed[i]

    rel.section_info[1].executable = True
    rel.section_info[1].offset = 0xD4
    rel.section_info[2].offset = 0x37F120
    rel.section_info[3].offset = 0x37F424
    rel.section_info[4].offset = 0x37F440
    rel.section_info[5].offset = 0x3A28F0
    rel.section_info[6].offset = 0

    # This seems to affect tz, but absolutely no one else?
    # Hack: Fix .bss size
    rel.section_info[6].length = 0x78B0

    rel.imps[0].moduleId = 1
    rel.imps[0].offset = 0x3CD104
    rel.imps[1].moduleId = 0
    rel.imps[1].offset = 0x4820F4

    with open(rel_path, "wb") as file:
        rel.reconstruct(file)
Esempio n. 49
0
    def prepare_for_injection(self):
        """
        Derived from http://vxheavens.com/lib/vsc01.html
        """
        modified = StringIO(self.binary.getvalue())

        # Add INJECTION_SIZE to the section header list offset to make room for our injected code
        elf_hdr = self.helper.header.copy()
        elf_hdr.e_shoff += INJECTION_SIZE
        logging.debug('Changing e_shoff to {}'.format(elf_hdr.e_shoff))

        modified.seek(0)
        modified.write(self.helper.structs.Elf_Ehdr.build(elf_hdr))

        # Find the main RX LOAD segment and also adjust other segment offsets along the way
        executable_segment = None

        for segment_idx, segment in enumerate(self.helper.iter_segments()):
            segment_hdr = segment.header.copy()
            segment_hdr_offset = self.helper._segment_offset(segment_idx)

            if executable_segment is not None:
                # Already past the executable segment, so just update the offset if needed (i.e. don't update things
                # that come before the expanded section)
                if segment_hdr.p_offset > last_exec_section['sh_offset']:
                    segment_hdr.p_offset += INJECTION_SIZE

            elif segment['p_type'] == 'PT_LOAD' and segment[
                    'p_flags'] & P_FLAGS.PF_X:
                # Found the executable LOAD segment.
                # Make room for our injected code.

                logging.debug(
                    'Found executable LOAD segment at index {}'.format(
                        segment_idx))
                executable_segment = segment

                last_exec_section_idx = max([
                    idx for idx in range(self.helper.num_sections())
                    if executable_segment.section_in_segment(
                        self.helper.get_section(idx))
                ])
                last_exec_section = self.helper.get_section(
                    last_exec_section_idx)

                segment_hdr.p_flags |= P_FLAGS.PF_X | P_FLAGS.PF_W | P_FLAGS.PF_R
                segment_hdr.p_filesz += INJECTION_SIZE
                segment_hdr.p_memsz += INJECTION_SIZE

                logging.debug(
                    'Rewriting segment filesize and memsize to {} and {}'.
                    format(segment_hdr.p_filesz, segment_hdr.p_memsz))

            modified.seek(segment_hdr_offset)
            modified.write(self.helper.structs.Elf_Phdr.build(segment_hdr))

        if executable_segment is None:
            logging.error(
                "Could not locate an executable LOAD segment. Cannot continue injection."
            )
            return False

        logging.debug(
            'Last section in executable LOAD segment is at index {} ({})'.
            format(last_exec_section_idx, last_exec_section.name))

        self.next_injection_offset = last_exec_section[
            'sh_offset'] + last_exec_section['sh_size']
        self.next_injection_vaddr = last_exec_section[
            'sh_addr'] + last_exec_section['sh_size']

        # Update sh_size for the section we grew
        section_header_offset = self.helper._section_offset(
            last_exec_section_idx)
        section_header = last_exec_section.header.copy()

        section_header.pflags = P_FLAGS.PF_R | P_FLAGS.PF_W | P_FLAGS.PF_X  # Hack to make it so we can RWX the page
        section_header.sh_size += INJECTION_SIZE

        modified.seek(section_header_offset)
        modified.write(self.helper.structs.Elf_Shdr.build(section_header))

        # Update sh_offset for each section past the last section in the executable segment
        for section_idx in range(last_exec_section_idx + 1,
                                 self.helper.num_sections()):
            section_header_offset = self.helper._section_offset(section_idx)
            section_header = self.helper.get_section(section_idx).header.copy()

            section_header.sh_offset += INJECTION_SIZE
            logging.debug('Rewriting section {}\'s offset to {}'.format(
                section_idx, section_header.sh_offset))

            modified.seek(section_header_offset)
            modified.write(self.helper.structs.Elf_Shdr.build(section_header))

        # TODO: Architecture-specific padding
        # Should be something that won't immediately crash, but can be caught (e.g. SIGTRAP on x86)
        modified = StringIO(modified.getvalue()[:self.next_injection_offset] +
                            '\xCC' * INJECTION_SIZE +
                            modified.getvalue()[self.next_injection_offset:])

        self.binary = modified
        self.helper = ELFFile(self.binary)

        return True
Esempio n. 50
0
def main( kdts_data_path, executable_path, flagged_slices_path, flagging_policy ):
    with open( kdts_data_path, "rb" ) as kdts_infile:
        kdts = pkl.load( kdts_infile )
        slice_to_callstacks = { k:v["callstack"] for k,v in kdts.items() }

    with open( executable_path, "rb" ) as executable_infile:
        elf_file = ELFFile( executable_infile )
        validate_executable( elf_file )
    
    with open( flagged_slices_path, "rb" ) as infile:
        policy_to_flagged_indices = pkl.load( infile )
        if len( policy_to_flagged_indices ) == 1:
            policy = list(policy_to_flagged_indices.keys())[0]
            flagged_slice_indices = policy_to_flagged_indices[policy]
        else:
            if flagging_policy is not None:
                flagged_slice_indices = policy_to_flagged_indices[flagging_policy]
            else:
                policies = policy_to_flagged_indices.keys()
                raise RuntimeError("Multiple flagging policies provided but none selected. Pick one of: {}".format(polices))
    
    # Unpack data
    #slice_indices = sorted(slice_to_callstacks.keys())
    slice_indices = [0, 5]
    flagged_slice_indices = [5]

    callstack_to_count = get_callstack_to_count( slice_indices, slice_to_callstacks )
    translated_callstack_to_count, address_to_translation = translate_callstacks( callstack_to_count, executable_path )
    call_set = get_call_set( translated_callstack_to_count )
    pair_to_count = get_caller_callee_pairs( translated_callstack_to_count )
    pair_to_freq = normalize_counts( pair_to_count )
    #pprint.pprint(translated_callstack_to_count)
    #pprint.pprint(pair_to_count)

    # Unpack flagged data
    flagged_callstack_to_count = get_callstack_to_count( flagged_slice_indices, slice_to_callstacks )
    translated_flagged_callstack_to_count, address_to_translation = translate_callstacks( callstack_to_count, executable_path, address_to_translation )
    flagged_call_set = get_call_set( translated_flagged_callstack_to_count )
    flagged_pair_to_count = get_caller_callee_pairs( translated_flagged_callstack_to_count )
    
    # Set up to build callgraph
    vid_to_call = { vid:call for vid,call in zip(range(len(call_set)),sorted(call_set)) }
    call_to_vid = { call:vid for vid,call in vid_to_call.items() }
    eid_to_pair = { eid:pair for eid,pair in zip(range(len(pair_to_count.keys())), sorted(pair_to_count.keys())) }
    #pprint.pprint( vid_to_call )
    #pprint.pprint( eid_to_pair )

    # Build callgraph 
    callgraph = igraph.Graph(directed=True)
    callgraph.add_vertices( len(vid_to_call.keys()) )
    for i in range(len(callgraph.vs[:])):
        v = callgraph.vs[i]
        v["function"] = vid_to_call[i]
        if vid_to_call[i] in flagged_call_set:
            v["color"] = "lightcoral"
        else:
            v["color"] = "lightgrey"
    edges = []
    edge_weights = []
    edge_colors = []
    for _,pair in eid_to_pair.items():
        caller, callee = pair
        caller_vid = call_to_vid[caller]
        callee_vid = call_to_vid[callee]
        edges.append( [ caller_vid, callee_vid ] )
        edge_weights.append( pair_to_freq[ pair ] )
        if pair in flagged_pair_to_count:
            edge_colors.append( "red" )
        else:
            edge_colors.append( "dimgrey" )
    callgraph.add_edges( edges )
    callgraph.es[:]["frequency"] = edge_weights
    callgraph.es[:]["color"] = edge_colors
    #for v in callgraph.vs[:]:
    #    print(v)
    #for e in callgraph.es[:]:
    #    print("Edge: {} --> {}, weight = {}".format(e.source, e.target, e["weight"]))
    
    # Configure plot appearance
    layout = callgraph.layout_sugiyama()
    n_vertices = len(callgraph.vs[:])
    vertex_label_distances = [ 2 ] * n_vertices
    edge_width_scale = 2
    edge_widths = [ ew*edge_width_scale for ew in callgraph.es[:]["frequency"] ]
    save_path = "callgraph.png"


    # Make plot
    igraph.plot( callgraph,
                 bbox=(2000, 1000),
                 layout = layout,
                 vertex_color = callgraph.vs[:]["color"],
                 vertex_label = callgraph.vs[:]["function"],
                 vertex_label_dist = vertex_label_distances,
                 edge_width = edge_widths,
                 edge_color = callgraph.es[:]["color"],
                 margin=80,
                 target = save_path )
Esempio n. 51
0
 def __init__(self, fname):
     self.fd = open(fname, 'rb')
     self.elffile = ELFFile(self.fd)
     self.container = Container()
Esempio n. 52
0
class ELFExecutable(BaseExecutable):
    def __init__(self, file_path):
        super(ELFExecutable, self).__init__(file_path)

        # magic byte test #
        '''
        aa = self.binary.read(4)
        print(aa)
        if (aa == b"\x7fELF"):
          print('YYYYYYY')
        '''

        self.helper = ELFFile(self.binary)

        self.architecture = self._identify_arch()

        if self.architecture is None:
            raise Exception('Architecture is not recognized')

        logging.debug('Initialized {} {} with file \'{}\''.format(
            self.architecture,
            type(self).__name__, file_path))

        self.pack_endianness = '<' if self.helper.little_endian else '>'
        self.address_pack_type = 'I' if self.helper.elfclass == 32 else 'Q'

        self.sections = [
            section_from_elf_section(s) for s in self.helper.iter_sections()
        ]

        self.executable_segment = [
            s for s in self.helper.iter_segments()
            if s['p_type'] == 'PT_LOAD' and s['p_flags'] & 0x1
        ][0]

        dyn = self.helper.get_section_by_name('.dynamic')
        if dyn:
            self.libraries = [
                t.needed for t in dyn.iter_tags() if t['d_tag'] == 'DT_NEEDED'
            ]

        self.next_injection_offset = None

    def _identify_arch(self):
        machine = self.helper.get_machine_arch()
        if machine == 'x86':
            return ARCHITECTURE.X86
        elif machine == 'x64':
            return ARCHITECTURE.X86_64
        elif machine == 'ARM':
            return ARCHITECTURE.ARM
        elif machine == 'AArch64':
            #return ARCHITECTURE.ARM_64
            return 'ARM64'
        else:
            return None

    def entry_point(self):
        return self.helper['e_entry']

    def executable_segment_vaddr(self):
        return self.executable_segment['p_vaddr']

    def executable_segment_size(self):
        # TODO: Maybe limit this because we use this as part of our injection method?
        return self.executable_segment['p_memsz']

    def iter_string_sections(self):
        STRING_SECTIONS = ['.rodata', '.data', '.bss']
        for s in self.sections:
            if s.name in STRING_SECTIONS:
                yield s

    def _extract_symbol_table(self):
        # Add in symbols from the PLT/rela.plt
        # .rela.plt contains indexes to reference both .dynsym (symbol names) and .plt (jumps to GOT)

        if self.is_64_bit():
            reloc_section = self.helper.get_section_by_name('.rela.plt')
        else:
            reloc_section = self.helper.get_section_by_name('.rel.plt')

        if reloc_section:
            dynsym = self.helper.get_section(
                reloc_section['sh_link'])  # .dynsym
            if isinstance(dynsym, SymbolTableSection):
                plt = self.helper.get_section_by_name('.plt')
                for idx, reloc in enumerate(reloc_section.iter_relocations()):
                    # Get the symbol's name from dynsym
                    symbol_name = dynsym.get_symbol(reloc['r_info_sym']).name

                    # The address of this function in the PLT is the base PLT offset + the index of the relocation.
                    # However, since there is the extra "trampoline" entity at the top of the PLT, we need to add one to the
                    # index to account for it.

                    # While sh_entsize is sometimes defined, it appears to be incorrect in some cases so we just ignore that
                    # and calculate it based off of the total size / num_relocations (plus the trampoline entity)
                    entsize = (plt['sh_size'] /
                               (reloc_section.num_relocations() + 1))

                    plt_addr = plt['sh_addr'] + ((idx + 1) * entsize)

                    # TODO jg: TypeError: 'float' object cannot be interpreted as an integer
                    #logging.debug('Directly adding PLT function {} at vaddr {}'.format(symbol_name, hex(plt_addr)))

                    f = Function(plt_addr,
                                 entsize,
                                 symbol_name + '@PLT',
                                 self,
                                 type=Function.DYNAMIC_FUNC)
                    self.functions[plt_addr] = f
            else:
                logging.debug(
                    '.rel(a).plt section had sh_link to {}. Not parsing symbols...'
                    .format(dynsym))

        if self.helper.get_section_by_name('.dynsym'):
            for symbol in self.helper.get_section_by_name(
                    '.dynsym').iter_symbols():
                if symbol.entry['st_info'][
                        'type'] == 'STT_FUNC' and symbol.entry['st_size'] > 0:
                    vaddr = symbol.entry['st_value']
                    if vaddr not in self.functions:
                        logging.debug(
                            'Adding function from .dynsym directly at vaddr {}'
                            .format(vaddr))
                        f = Function(vaddr,
                                     symbol.entry['st_size'],
                                     symbol.name,
                                     self,
                                     type=Function.DYNAMIC_FUNC)
                        self.functions[vaddr] = f

        # Some things in the symtab have st_size = 0 which confuses analysis later on. To solve this, we keep track of
        # where each address is in the `function_vaddrs` set and go back after all symbols have been iterated to compute
        # size by taking the difference between the current address and the next recorded address.

        # We do this for each executable section so that the produced functions cannot span multiple sections.

        for section in self.helper.iter_sections():
            if self.executable_segment.section_in_segment(section):
                name_for_addr = {}

                function_vaddrs = set(
                    [section['sh_addr'] + section['sh_size']])

                symbol_table = self.helper.get_section_by_name('.symtab')
                if symbol_table:
                    for symbol in symbol_table.iter_symbols():
                        if symbol['st_info']['type'] == 'STT_FUNC' and symbol[
                                'st_shndx'] != 'SHN_UNDEF':
                            if section['sh_addr'] <= symbol[
                                    'st_value'] < section['sh_addr'] + section[
                                        'sh_size']:
                                name_for_addr[symbol['st_value']] = symbol.name
                                function_vaddrs.add(symbol['st_value'])

                                if symbol['st_size']:
                                    logging.debug(
                                        'Eagerly adding function {} from .symtab at vaddr {} with size {}'
                                        .format(symbol.name,
                                                hex(symbol['st_value']),
                                                hex(symbol['st_size'])))
                                    f = Function(symbol['st_value'],
                                                 symbol['st_size'],
                                                 symbol.name, self)
                                    self.functions[symbol['st_value']] = f

                function_vaddrs = sorted(list(function_vaddrs))

                for cur_addr, next_addr in zip(function_vaddrs[:-1],
                                               function_vaddrs[1:]):
                    # If st_size was set, we already added the function above, so don't add it again.
                    if cur_addr not in self.functions:
                        func_name = name_for_addr[cur_addr]
                        size = next_addr - cur_addr
                        logging.debug(
                            'Lazily adding function {} from .symtab at vaddr {} with size {}'
                            .format(func_name, hex(cur_addr), hex(size)))
                        f = Function(cur_addr,
                                     next_addr - cur_addr,
                                     name_for_addr[cur_addr],
                                     self,
                                     type=Function.DYNAMIC_FUNC)
                        self.functions[cur_addr] = f

        # TODO: Automatically find and label main from call to libc_start_main

    def prepare_for_injection(self):
        """
        Derived from http://vxheavens.com/lib/vsc01.html
        """
        modified = StringIO(self.binary.getvalue())

        # Add INJECTION_SIZE to the section header list offset to make room for our injected code
        elf_hdr = self.helper.header.copy()
        elf_hdr.e_shoff += INJECTION_SIZE
        logging.debug('Changing e_shoff to {}'.format(elf_hdr.e_shoff))

        modified.seek(0)
        modified.write(self.helper.structs.Elf_Ehdr.build(elf_hdr))

        # Find the main RX LOAD segment and also adjust other segment offsets along the way
        executable_segment = None

        for segment_idx, segment in enumerate(self.helper.iter_segments()):
            segment_hdr = segment.header.copy()
            segment_hdr_offset = self.helper._segment_offset(segment_idx)

            if executable_segment is not None:
                # Already past the executable segment, so just update the offset if needed (i.e. don't update things
                # that come before the expanded section)
                if segment_hdr.p_offset > last_exec_section['sh_offset']:
                    segment_hdr.p_offset += INJECTION_SIZE

            elif segment['p_type'] == 'PT_LOAD' and segment[
                    'p_flags'] & P_FLAGS.PF_X:
                # Found the executable LOAD segment.
                # Make room for our injected code.

                logging.debug(
                    'Found executable LOAD segment at index {}'.format(
                        segment_idx))
                executable_segment = segment

                last_exec_section_idx = max([
                    idx for idx in range(self.helper.num_sections())
                    if executable_segment.section_in_segment(
                        self.helper.get_section(idx))
                ])
                last_exec_section = self.helper.get_section(
                    last_exec_section_idx)

                segment_hdr.p_flags |= P_FLAGS.PF_X | P_FLAGS.PF_W | P_FLAGS.PF_R
                segment_hdr.p_filesz += INJECTION_SIZE
                segment_hdr.p_memsz += INJECTION_SIZE

                logging.debug(
                    'Rewriting segment filesize and memsize to {} and {}'.
                    format(segment_hdr.p_filesz, segment_hdr.p_memsz))

            modified.seek(segment_hdr_offset)
            modified.write(self.helper.structs.Elf_Phdr.build(segment_hdr))

        if executable_segment is None:
            logging.error(
                "Could not locate an executable LOAD segment. Cannot continue injection."
            )
            return False

        logging.debug(
            'Last section in executable LOAD segment is at index {} ({})'.
            format(last_exec_section_idx, last_exec_section.name))

        self.next_injection_offset = last_exec_section[
            'sh_offset'] + last_exec_section['sh_size']
        self.next_injection_vaddr = last_exec_section[
            'sh_addr'] + last_exec_section['sh_size']

        # Update sh_size for the section we grew
        section_header_offset = self.helper._section_offset(
            last_exec_section_idx)
        section_header = last_exec_section.header.copy()

        section_header.pflags = P_FLAGS.PF_R | P_FLAGS.PF_W | P_FLAGS.PF_X  # Hack to make it so we can RWX the page
        section_header.sh_size += INJECTION_SIZE

        modified.seek(section_header_offset)
        modified.write(self.helper.structs.Elf_Shdr.build(section_header))

        # Update sh_offset for each section past the last section in the executable segment
        for section_idx in range(last_exec_section_idx + 1,
                                 self.helper.num_sections()):
            section_header_offset = self.helper._section_offset(section_idx)
            section_header = self.helper.get_section(section_idx).header.copy()

            section_header.sh_offset += INJECTION_SIZE
            logging.debug('Rewriting section {}\'s offset to {}'.format(
                section_idx, section_header.sh_offset))

            modified.seek(section_header_offset)
            modified.write(self.helper.structs.Elf_Shdr.build(section_header))

        # TODO: Architecture-specific padding
        # Should be something that won't immediately crash, but can be caught (e.g. SIGTRAP on x86)
        modified = StringIO(modified.getvalue()[:self.next_injection_offset] +
                            '\xCC' * INJECTION_SIZE +
                            modified.getvalue()[self.next_injection_offset:])

        self.binary = modified
        self.helper = ELFFile(self.binary)

        return True

    def inject(self, asm, update_entry=False):
        if self.next_injection_offset is None or self.next_injection_vaddr is None:
            logging.warning(
                'prepare_for_injection() was not called before inject(). Calling now, but this may cause unexpected behavior'
            )
            self.prepare_for_injection()

        for segment in self.helper.iter_segments():
            if segment['p_type'] == 'PT_LOAD' and segment[
                    'p_flags'] & P_FLAGS.PF_X:
                injection_section_idx = max(
                    i for i in range(self.helper.num_sections())
                    if segment.section_in_segment(self.helper.get_section(i)))
                break

        injection_section = self.helper.get_section(injection_section_idx)

        # If we haven't injected code before or need to expand the section again for this injection, go ahead and
        # shift stuff around.
        if injection_section['sh_offset'] + injection_section[
                'sh_size'] < self.next_injection_offset + len(asm):
            logging.debug(
                'Automatically expanding injection section to accommodate for assembly'
            )

            # NOTE: Could this change the destination address for the code that gets injected?
            self.prepare_for_injection()
            injection_section = self.helper.get_section(injection_section_idx)

            used_code_len = len(injection_section.data().rstrip('\xCC'))
            self.next_injection_offset = injection_section[
                'sh_offset'] + used_code_len
            self.next_injection_vaddr = injection_section[
                'sh_addr'] + used_code_len

        # "Inject" the assembly
        logging.debug('Injecting {} bytes of assembly at offset {}'.format(
            len(asm), self.next_injection_offset))
        self.binary.seek(self.next_injection_offset)
        self.binary.write(asm)

        # Update e_entry if requested
        if update_entry:
            logging.debug('Rewriting ELF entry address to {}'.format(
                self.next_injection_vaddr))
            elf_hdr = self.helper.header
            elf_hdr.e_entry = self.next_injection_vaddr

            self.binary.seek(0)
            self.binary.write(self.helper.structs.Elf_Ehdr.build(elf_hdr))

        self.helper = ELFFile(self.binary)

        self.next_injection_vaddr += len(asm)
        self.next_injection_offset += len(asm)

        return self.next_injection_vaddr - len(asm)
Esempio n. 53
0
from elftools.elf.elffile import ELFFile
from capstone import *

with open('./chall.elf', 'rb') as f:
    elf = ELFFile(f)
    code = elf.get_section_by_name('.text')
    ops = code.data()  # returns a bytestring with the opcodes
    addr = code['sh_addr']  # starting address of `.text`
    md = Cs(CS_ARCH_X86, CS_MODE_64)
    for i in md.disasm(ops, addr):  # looping through each opcode
        print(f"0x{i.address:x}:\t{i.mnemonic}\t{i.op_str}")
Esempio n. 54
0
class Loader():
    def __init__(self, fname):
        self.fd = open(fname, 'rb')
        self.elffile = ELFFile(self.fd)
        self.container = Container()

    def load_functions(self, fnlist):
        section = self.elffile.get_section_by_name(".text")
        data = section.data()
        base = section['sh_addr']  #the base addrs of .text section
        for faddr, fvalue in fnlist.items():  #the symbol addrs and value
            section_offset = faddr - base
            bytes = data[section_offset:section_offset +
                         fvalue["sz"]]  #get the function's machine bytes

            function = Function(fvalue["name"], faddr, fvalue["sz"], bytes,
                                fvalue["bind"])
            self.container.add_function(function)

    #long
    #liner disassemble
    def load_text(self):
        section = self.elffile.get_section_by_name(".text")
        data = section.data()  #bytes
        text_base = section['sh_addr']  #the base addrs of .text section
        text_size = section['sh_size']
        entries = disasm_bytes(data, text_base)
        tmpfile = open('/tmp/text.s', 'w')
        for i in list(entries):
            self.container.inst_addrs_set.add(i.address)
            tmpfile.write("0x%x:\t%s\t%s\n" %
                          (i.address, i.mnemonic, i.op_str))
        tmpfile.close()
        for decoded in entries:
            self.container.disa_list.append(InstructionWrapper(decoded))

        #print('load_text')
    def load_data_sections(self, seclist, section_filter=lambda x: True):
        for sec in [sec for sec in seclist if section_filter(sec)]:
            sval = seclist[sec]
            section = self.elffile.get_section_by_name(sec)
            data = section.data()
            more = bytearray()
            if sec == ".init_array":
                if len(data) > 8:
                    data = data[8:]
                else:
                    data = b''
                more.extend(data)
            else:
                more.extend(data)
                if len(more) < sval['sz']:
                    more.extend(
                        [0x0 for _ in range(0, sval['sz'] - len(more))])

            bytes = more
            ds = DataSection(sec, sval["base"], sval["sz"], bytes,
                             sval['align'])  #data section

            self.container.add_section(ds)

        # Find if there is a plt section
        for sec in seclist:
            if sec == '.plt':
                self.container.plt_base = seclist[sec]['base']
            if sec == ".plt.got":
                section = self.elffile.get_section_by_name(sec)
                data = section.data()
                entries = list(
                    disasm_bytes(
                        section.data(),
                        seclist[sec]['base']))  #disasm based on capstone
                self.container.gotplt_base = seclist[sec]['base']
                self.container.gotplt_sz = seclist[sec]['sz']
                self.container.gotplt_entries = entries

    def load_relocations(self, relocs):
        for reloc_section, relocations in relocs.items():
            section = reloc_section[5:]

            if reloc_section == ".rela.plt":
                self.container.add_plt_information(relocations)

            if section in self.container.sections:
                self.container.sections[section].add_relocations(relocations)
            else:
                print("[*] Relocations for a section that's not loaded:",
                      reloc_section)
                self.container.add_relocations(
                    section, relocations)  # add the .dyn section

    def reloc_list_from_symtab(self):
        relocs = defaultdict(list)

        for section in self.elffile.iter_sections():
            if not isinstance(section, RelocationSection):
                continue

            symtable = self.elffile.get_section(section['sh_link'])  #symtable

            for rel in section.iter_relocations():
                symbol = None
                if rel['r_info_sym'] != 0:
                    symbol = symtable.get_symbol(
                        rel['r_info_sym']
                    )  # get the symbol based on the rel index

                if symbol:
                    if symbol['st_name'] == 0:
                        symsec = self.elffile.get_section(symbol['st_shndx'])
                        symbol_name = symsec.name
                    else:
                        symbol_name = symbol.name
                else:
                    symbol = dict(st_value=None)
                    symbol_name = None

                reloc_i = {
                    'name': symbol_name,
                    'st_value': symbol['st_value'],
                    'offset': rel['r_offset'],
                    'addend': rel['r_addend'],
                    'type': rel['r_info_type'],
                }

                relocs[section.name].append(reloc_i)

        return relocs

    def flist_from_symtab(self):
        symbol_tables = [
            sec for sec in self.elffile.iter_sections()
            if isinstance(sec, SymbolTableSection)
        ]

        function_list = dict()
        #long
        f1 = open('/tmp/funcs.txt', 'w')
        for section in symbol_tables:
            if not isinstance(section, SymbolTableSection):
                continue

            if section['sh_entsize'] == 0:
                continue

            for symbol in section.iter_symbols():
                #f1.write('longfunc:'+symbol.name+'\t'+hex(symbol['st_value'])+'\n'+'\t'+repr(symbol['st_other']['visibility'])+'\t'+repr(symbol['st_info']['type'])+'\t'+repr(symbol['st_shndx'])+'\t'+repr(symbol['st_size'])+'\n')
                if symbol['st_other']['visibility'] == "STV_HIDDEN":
                    pass
                    #continue

                if (symbol['st_info']['type'] == 'STT_FUNC'
                        and symbol['st_shndx'] !=
                        'SHN_UNDEF'):  #get function sysmbol

                    f1.write('longfunc:' + symbol.name + '\t' +
                             hex(symbol['st_value']) + '\n' + '\t' +
                             repr(symbol['st_other']['visibility']) + '\t' +
                             repr(symbol['st_info']['type']) + '\t' +
                             repr(symbol['st_shndx']) + '\t' +
                             repr(symbol['st_size']) + '\n')
                    function_list[symbol['st_value']] = {
                        'name': symbol.name,
                        'sz': symbol['st_size'],
                        'visibility': symbol['st_other']['visibility'],
                        'bind': symbol['st_info']['bind'],
                    }
        f1.close()
        return function_list

    def flist_from_symtab1(self):
        symbol_tables = [
            sec for sec in self.elffile.iter_sections()
            if isinstance(sec, SymbolTableSection)
        ]

        function_list = dict()
        #long
        f1 = open('/tmp/funcs.txt', 'w')
        for section in symbol_tables:
            if not isinstance(section, SymbolTableSection):
                continue

            if section['sh_entsize'] == 0:
                continue

            for symbol in section.iter_symbols():
                f1.write('longfunc:' + symbol.name + '\t' +
                         hex(symbol['st_value']) + '\n' + '\t' +
                         repr(symbol['st_other']['visibility']) + '\t' +
                         repr(symbol['st_info']['type']) + '\t' +
                         repr(symbol['st_shndx']) + '\t' +
                         repr(symbol['st_size']) + '\n')
                if symbol['st_other']['visibility'] == "STV_HIDDEN":
                    pass
                    #continue

                if (symbol['st_info']['type'] == 'STT_FUNC'
                        and symbol['st_shndx'] !=
                        'SHN_UNDEF'):  #get function sysmbol
                    pass
                if (symbol['st_info']['type'] == 'STT_FUNC'):
                    function_list[symbol['st_value']] = {
                        'name': symbol.name,
                        'sz': symbol['st_size'],
                        'visibility': symbol['st_other']['visibility'],
                        'bind': symbol['st_info']['bind'],
                    }
        f1.close()
        return function_list

    def slist_from_symtab(self):
        sections = dict()
        for section in self.elffile.iter_sections():
            sections[section.name] = {
                'base': section['sh_addr'],
                'sz': section['sh_size'],
                'offset': section['sh_offset'],
                'align': section['sh_addralign'],
            }

        return sections

    def load_globals_from_glist(self, glist):
        self.container.add_globals(glist)

    def global_data_list_from_symtab(self):
        symbol_tables = [
            sec for sec in self.elffile.iter_sections()
            if isinstance(sec, SymbolTableSection)
        ]  # two symbol_table .dynsym and .symtab

        global_list = defaultdict(list)
        #long
        f1 = open('/tmp/data.txt', 'w')
        for section in symbol_tables:
            if not isinstance(section, SymbolTableSection):
                continue

            if section['sh_entsize'] == 0:
                continue

            for symbol in section.iter_symbols():
                # XXX: HACK
                if "@@GLIBC" in symbol.name:
                    continue
                if symbol['st_other']['visibility'] == "STV_HIDDEN":
                    continue
                if symbol['st_size'] == 0:
                    continue

                if (symbol['st_info']['type'] == 'STT_OBJECT'
                        and symbol['st_shndx'] != 'SHN_UNDEF'):

                    f1.write('name: ' + symbol.name + "\tsize: " +
                             repr(symbol['st_size']) + '\tvisual: ' +
                             symbol['st_other']['visibility'] + '\tbind: ' +
                             symbol['st_info']['bind'] + '\n')
                    myname = "{}".format(symbol.name)
                    if (myname == '_IO_stdin_used'):
                        myname = myname + '_1'
                    global_list[symbol['st_value']].append({
                        'name':
                        "{}_{:x}".format(symbol.name, symbol['st_value']),
                        #myname,
                        'sz':
                        symbol['st_size'],
                    })
        f1.close()
        return global_list
Esempio n. 55
0
 def __init__(self, elf_file):
     self._elf = ELFFile(elf_file)
     self.symbol_table = None
     self.dwarf_info = None
Esempio n. 56
0
    def test_build_wheel_depending_on_library_with_rpath(
            self, any_manylinux_container, docker_python, io_folder, dtag):
        # Test building a wheel that contains an extension depending on a library
        # with RPATH or RUNPATH set.
        # Following checks are performed:
        # - check if RUNPATH is replaced by RPATH
        # - check if RPATH location is correct, i.e. it is inside .libs directory
        #   where all gathered libraries are put

        policy, tag, manylinux_ctr = any_manylinux_container

        docker_exec(
            manylinux_ctr,
            [
                "bash",
                "-c",
                ("cd /auditwheel_src/tests/integration/testrpath &&"
                 "if [ -d ./build ]; then rm -rf ./build ./*.egg-info; fi && "
                 f"DTAG={dtag} python setup.py bdist_wheel -d /io"),
            ],
        )
        with open(op.join(op.dirname(__file__), "testrpath", "a", "liba.so"),
                  "rb") as f:
            elf = ELFFile(f)
            dynamic = elf.get_section_by_name(".dynamic")
            tags = {t.entry.d_tag for t in dynamic.iter_tags()}
            assert f"DT_{dtag.upper()}" in tags
        filenames = os.listdir(io_folder)
        assert filenames == [
            f"testrpath-0.0.1-{PYTHON_ABI}-linux_{PLATFORM}.whl"
        ]
        orig_wheel = filenames[0]
        assert "manylinux" not in orig_wheel

        # Repair the wheel using the appropriate manylinux container
        repair_command = (
            f"auditwheel repair --plat {policy} --only-plat -w /io /io/{orig_wheel}"
        )
        docker_exec(
            manylinux_ctr,
            [
                "bash",
                "-c",
                "LD_LIBRARY_PATH="
                "/auditwheel_src/tests/integration/testrpath/a:$LD_LIBRARY_PATH "
                + repair_command,
            ],
        )
        filenames = os.listdir(io_folder)
        assert len(filenames) == 2
        repaired_wheel = f"testrpath-0.0.1-{PYTHON_ABI}-{tag}.whl"
        assert repaired_wheel in filenames
        assert_show_output(manylinux_ctr, repaired_wheel, policy, False)

        docker_exec(docker_python, "pip install /io/" + repaired_wheel)
        output = docker_exec(
            docker_python,
            [
                "python",
                "-c",
                "from testrpath import testrpath; print(testrpath.func())",
            ],
        )
        assert output.strip() == "11"
        with zipfile.ZipFile(os.path.join(io_folder, repaired_wheel)) as w:
            for name in w.namelist():
                if "testrpath/.libs/lib" in name:
                    with w.open(name) as f:
                        elf = ELFFile(io.BytesIO(f.read()))
                        dynamic = elf.get_section_by_name(".dynamic")
                        assert (len([
                            t for t in dynamic.iter_tags()
                            if t.entry.d_tag == "DT_RUNPATH"
                        ]) == 0)
                        if ".libs/liba" in name:
                            rpath_tags = [
                                t for t in dynamic.iter_tags()
                                if t.entry.d_tag == "DT_RPATH"
                            ]
                            assert len(rpath_tags) == 1
                            assert rpath_tags[0].rpath == "$ORIGIN/."
Esempio n. 57
0
class ReadElf(object):
    """ display_* methods are used to emit output into the output stream
    """
    def __init__(self, file, output):
        """ file:
                stream object with the ELF file to read

            output:
                output stream to write to
        """
        self.elffile = ELFFile(file)
        self.output = output

        # Lazily initialized if a debug dump is requested
        self._dwarfinfo = None

        self._versioninfo = None

    def _section_from_spec(self, spec):
        """ Retrieve a section given a "spec" (either number or name).
            Return None if no such section exists in the file.
        """
        try:
            num = int(spec)
            if num < self.elffile.num_sections():
                return self.elffile.get_section(num)
            else:
                return None
        except ValueError:
            # Not a number. Must be a name then
            section = self.elffile.get_section_by_name(force_unicode(spec))
            if section is None:
                # No match with a unicode name.
                # Some versions of pyelftools (<= 0.23) store internal strings
                # as bytes. Try again with the name encoded as bytes.
                section = self.elffile.get_section_by_name(force_bytes(spec))
            return section

    def pretty_print_pmdinfo(self, pmdinfo):
        global pcidb

        for i in pmdinfo["pci_ids"]:
            vendor = pcidb.find_vendor(i[0])
            device = vendor.find_device(i[1])
            subdev = device.find_subid(i[2], i[3])
            print(
                "%s (%s) : %s (%s) %s" %
                (vendor.name, vendor.ID, device.name, device.ID, subdev.name))

    def parse_pmd_info_string(self, mystring):
        global raw_output
        global pcidb

        optional_pmd_info = [{
            'id': 'params',
            'tag': 'PMD PARAMETERS'
        }, {
            'id': 'kmod',
            'tag': 'PMD KMOD DEPENDENCIES'
        }]

        i = mystring.index("=")
        mystring = mystring[i + 2:]
        pmdinfo = json.loads(mystring)

        if raw_output:
            print(json.dumps(pmdinfo))
            return

        print("PMD NAME: " + pmdinfo["name"])
        for i in optional_pmd_info:
            try:
                print("%s: %s" % (i['tag'], pmdinfo[i['id']]))
            except KeyError:
                continue

        if (len(pmdinfo["pci_ids"]) != 0):
            print("PMD HW SUPPORT:")
            if pcidb is not None:
                self.pretty_print_pmdinfo(pmdinfo)
            else:
                print("VENDOR\t DEVICE\t SUBVENDOR\t SUBDEVICE")
                for i in pmdinfo["pci_ids"]:
                    print("0x%04x\t 0x%04x\t 0x%04x\t\t 0x%04x" %
                          (i[0], i[1], i[2], i[3]))

        print("")

    def display_pmd_info_strings(self, section_spec):
        """ Display a strings dump of a section. section_spec is either a
            section number or a name.
        """
        section = self._section_from_spec(section_spec)
        if section is None:
            return

        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            while (dataptr < len(data)
                   and not (32 <= byte2int(data[dataptr]) <= 127)):
                dataptr += 1

            if dataptr >= len(data):
                break

            endptr = dataptr
            while endptr < len(data) and byte2int(data[endptr]) != 0:
                endptr += 1

            # pyelftools may return byte-strings, force decode them
            mystring = force_unicode(data[dataptr:endptr])
            rc = mystring.find("PMD_INFO_STRING")
            if (rc != -1):
                self.parse_pmd_info_string(mystring)

            dataptr = endptr

    def find_librte_eal(self, section):
        for tag in section.iter_tags():
            # pyelftools may return byte-strings, force decode them
            if force_unicode(tag.entry.d_tag) == 'DT_NEEDED':
                if "librte_eal" in force_unicode(tag.needed):
                    return force_unicode(tag.needed)
        return None

    def search_for_autoload_path(self):
        scanelf = self
        scanfile = None
        library = None

        section = self._section_from_spec(".dynamic")
        try:
            eallib = self.find_librte_eal(section)
            if eallib is not None:
                ldlibpath = os.environ.get('LD_LIBRARY_PATH')
                if ldlibpath is None:
                    ldlibpath = ""
                dtr = self.get_dt_runpath(section)
                library = search_file(
                    eallib,
                    dtr + ":" + ldlibpath + ":/usr/lib64:/lib64:/usr/lib:/lib")
                if library is None:
                    return (None, None)
                if raw_output is False:
                    print("Scanning for autoload path in %s" % library)
                scanfile = io.open(library, 'rb')
                scanelf = ReadElf(scanfile, sys.stdout)
        except AttributeError:
            # Not a dynamic binary
            pass
        except ELFError:
            scanfile.close()
            return (None, None)

        section = scanelf._section_from_spec(".rodata")
        if section is None:
            if scanfile is not None:
                scanfile.close()
            return (None, None)

        data = section.data()
        dataptr = 0

        while dataptr < len(data):
            while (dataptr < len(data)
                   and not (32 <= byte2int(data[dataptr]) <= 127)):
                dataptr += 1

            if dataptr >= len(data):
                break

            endptr = dataptr
            while endptr < len(data) and byte2int(data[endptr]) != 0:
                endptr += 1

            # pyelftools may return byte-strings, force decode them
            mystring = force_unicode(data[dataptr:endptr])
            rc = mystring.find("DPDK_PLUGIN_PATH")
            if (rc != -1):
                rc = mystring.find("=")
                return (mystring[rc + 1:], library)

            dataptr = endptr
        if scanfile is not None:
            scanfile.close()
        return (None, None)

    def get_dt_runpath(self, dynsec):
        for tag in dynsec.iter_tags():
            # pyelftools may return byte-strings, force decode them
            if force_unicode(tag.entry.d_tag) == 'DT_RUNPATH':
                return force_unicode(tag.runpath)
        return ""

    def process_dt_needed_entries(self):
        """ Look to see if there are any DT_NEEDED entries in the binary
            And process those if there are
        """
        runpath = ""
        ldlibpath = os.environ.get('LD_LIBRARY_PATH')
        if ldlibpath is None:
            ldlibpath = ""

        dynsec = self._section_from_spec(".dynamic")
        try:
            runpath = self.get_dt_runpath(dynsec)
        except AttributeError:
            # dynsec is None, just return
            return

        for tag in dynsec.iter_tags():
            # pyelftools may return byte-strings, force decode them
            if force_unicode(tag.entry.d_tag) == 'DT_NEEDED':
                if 'librte_' in force_unicode(tag.needed):
                    library = search_file(
                        force_unicode(tag.needed), runpath + ":" + ldlibpath +
                        ":/usr/lib64:/lib64:/usr/lib:/lib")
                    if library is not None:
                        with io.open(library, 'rb') as file:
                            try:
                                libelf = ReadElf(file, sys.stdout)
                            except ELFError:
                                print("%s is no an ELF file" % library)
                                continue
                            libelf.process_dt_needed_entries()
                            libelf.display_pmd_info_strings(".rodata")
                            file.close()
Esempio n. 58
0
def ParseELF(path, root='/', ldpaths={'conf':[], 'env':[], 'interp':[]},
             _first=True, _all_libs={}):
  """Parse the ELF dependency tree of the specified file

  Args:
    path: The ELF to scan
    root: The root tree to prepend to paths; this applies to interp and rpaths
          only as |path| and |ldpaths| are expected to be prefixed already
    ldpaths: dict containing library paths to search; should have the keys:
             conf, env, interp
    _first: Recursive use only; is this the first ELF ?
    _all_libs: Recursive use only; dict of all libs we've seen
  Returns:
    a dict containing information about all the ELFs; e.g.
    {
      'interp': '/lib64/ld-linux.so.2',
      'needed': ['libc.so.6', 'libcurl.so.4',],
      'libs': {
        'libc.so.6': {
          'path': '/lib64/libc.so.6',
          'needed': [],
        },
        'libcurl.so.4': {
          'path': '/usr/lib64/libcurl.so.4',
          'needed': ['libc.so.6', 'librt.so.1',],
        },
      },
    }
  """
  if _first:
    _all_libs = {}
    ldpaths = ldpaths.copy()
  ret = {
    'interp': None,
    'path': path,
    'needed': [],
    'rpath': [],
    'runpath': [],
    'libs': _all_libs,
  }

  with open(path, 'rb') as f:
    elf = ELFFile(f)

    # If this is the first ELF, extract the interpreter.
    if _first:
      for segment in elf.iter_segments():
        if segment.header.p_type != 'PT_INTERP':
          continue

        interp = bstr(segment.get_interp_name())
        ret['interp'] = normpath(root + interp)
        ret['libs'][os.path.basename(interp)] = {
          'path': ret['interp'],
          'needed': [],
        }
        # XXX: Should read it and scan for /lib paths.
        ldpaths['interp'] = [
          normpath(root + os.path.dirname(interp)),
          normpath(root + '/usr' + os.path.dirname(interp)),
        ]
        break

    # Parse the ELF's dynamic tags.
    libs = []
    rpaths = []
    runpaths = []
    for segment in elf.iter_segments():
      if segment.header.p_type != 'PT_DYNAMIC':
        continue

      for t in segment.iter_tags():
        if t.entry.d_tag == 'DT_RPATH':
          rpaths = ParseLdPaths(bstr(t.rpath), root=root, path=path)
        elif t.entry.d_tag == 'DT_RUNPATH':
          runpaths = ParseLdPaths(bstr(t.runpath), root=root, path=path)
        elif t.entry.d_tag == 'DT_NEEDED':
          libs.append(bstr(t.needed))
      if runpaths:
        # If both RPATH and RUNPATH are set, only the latter is used.
        rpaths = []

      # XXX: We assume there is only one PT_DYNAMIC.  This is
      # probably fine since the runtime ldso does the same.
      break
    if _first:
      # Propagate the rpaths used by the main ELF since those will be
      # used at runtime to locate things.
      ldpaths['rpath'] = rpaths
      ldpaths['runpath'] = runpaths
    ret['rpath'] = rpaths
    ret['runpath'] = runpaths
    ret['needed'] = libs

    # Search for the libs this ELF uses.
    all_ldpaths = None
    for lib in libs:
      if lib in _all_libs:
        continue
      if all_ldpaths is None:
        all_ldpaths = rpaths + ldpaths['rpath'] + ldpaths['env'] + runpaths + ldpaths['runpath'] + ldpaths['conf'] + ldpaths['interp']
      fullpath = FindLib(elf, lib, all_ldpaths)
      _all_libs[lib] = {
        'path': fullpath,
        'needed': [],
      }
      if fullpath:
        lret = ParseELF(fullpath, root, ldpaths, False, _all_libs)
        _all_libs[lib]['needed'] = lret['needed']

    del elf

  return ret
Esempio n. 59
0
def process_file(filename, symbolToFind):
    actualFunc = ""
    listToWrite = []
    print('Processing file: ', filename)
    print('\tfor symbol: ', symbolToFind)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)
        for section in elffile.iter_sections():
            if not isinstance(section, SymbolTableSection):
                continue
            if section['sh_entsize'] == 0:
                print('No Symbol Table!!!')
                continue
            for nsym, symbol in enumerate(section.iter_symbols()):
                string = ('%s %s' % (symbol.name.decode("utf-8"),
                                     format_hex(symbol['st_value'])))
                if symbolToFind in str(symbol.name):
                    if symbolToFind == symbol.name.decode("utf-8"):
                        actualFunc = string
                    listToWrite.append(string)

    # Regular expression to match only dmtcp wrappers
    listToWrite = [
        x for x in listToWrite
        if re.search(symbolToFind + r'_wrap__dmtcp_\d+', x) != None
    ]

    # If no symbolToFind... Exit
    if len(listToWrite) < 1:
        print("*** *** No symbols found for: " + symbolToFind + "__dmtcp_*")
        sys.stdout.flush()
        sys.exit(1)

    # Sort list of strings of address and symbol name
    listToWrite.sort()
    listToWrite += [actualFunc]
    print(listToWrite)

    # Remove the name portion since sorted
    listToWrite = [x.split(' ', 1)[1] for x in listToWrite]

    # Put in 0 (NULL) hole library that don't have symbol
    with open('all_library_wrappers_dmtcp.nz', 'r') as f:
        l_lines = f.readlines()
        print('****************', l_lines)
        for ind, line in enumerate(l_lines):
            print(symbolToFind, ind, line, '****************')
            if symbolToFind not in line:
                listToWrite.insert(ind, '0')

    # mkdir
    if not os.path.exists('./addrs'):
        os.mkdir('addrs')
    cwd = os.getcwd() + r'/addrs/'

    # Open file, filename will be: symbolToFind + '__dmtcp_*.addr'
    fileToWrite = cwd + symbolToFind + r'__dmtcp.addr'
    write_to_file(fileToWrite, listToWrite)
    print(listToWrite)
    print("\tThe addresses directly above has been written out to : " +
          fileToWrite + "\n\n")
Esempio n. 60
0
def main():
    #take an elf file and pack all of the sections together that are required
    params = parser.parse_args()

    f = open(params.input, "rb")
    o = open(params.output, "wb")
    m = open(params.map, "w")
    elf = ELFFile(f)

    #if a section starts with one of these then we will rip it out of the file
    ValidHeaders = [".text", ".rodata", ".data", ".got"]
    DataLen = 0

    #get maximum length for the fields
    NameWidth = 0
    AddrWidth = 0
    SizeWidth = 0
    DataWidth = 0
    for SectionID in xrange(0, elf.num_sections()):
        CurSection = elf.get_section(SectionID)

        for ValidEntry in ValidHeaders:
            if CurSection.name.startswith(ValidEntry):
                CurSection = elf.get_section(SectionID)
                if len(CurSection.name) > NameWidth:
                    NameWidth = len(CurSection.name)

                if len("%x" % CurSection.header.sh_addr) > AddrWidth:
                    AddrWidth = len("%x" % CurSection.header.sh_addr)

                if len("%x" % CurSection.data_size) > SizeWidth:
                    SizeWidth = len("%x" % CurSection.data_size)

                DataWidth += CurSection.data_size
                break

    #fix DataWidth
    DataWidth = len("%x" % DataWidth)

    #write all sections out
    for SectionID in xrange(0, elf.num_sections()):
        CurSection = elf.get_section(SectionID)

       
        #if we have a valid header then write it out
        for ValidEntry in ValidHeaders:
            if CurSection.name.startswith(ValidEntry):
                DataStart = DataLen
                o.write(CurSection.data())

                #add padding if we are not 4 byte aligned to allow easier disassembly
                Padding = 0
                if CurSection.data_size & 3:
                    Padding = 4 - (CurSection.data_size & 3)
                    o.write("\x00" * Padding)

                DataLen += CurSection.data_size + Padding
                m.write("%-{}s %{}x %{}x %{}x\n".format(NameWidth + 1, AddrWidth + 1, DataWidth + 1, SizeWidth + 1) % (CurSection.name, CurSection.header.sh_addr, DataStart, CurSection.data_size))
                break

    f.close()
    o.close()
    m.close()

    print "Wrote %d bytes to %s" % (DataLen, params.output)
    return 0