예제 #1
0
def modify_object_macholib(cur_path, paths_to_paths):
    """
    This function is used when install machO buildcaches on linux by
    rewriting mach-o loader commands for dependency library paths of
    mach-o binaries and the id path for mach-o libraries.
    Rewritting of rpaths is handled by replace_prefix_bin.
    Inputs
    mach-o binary to be modified
    dictionary mapping paths in old install layout to new install layout
    """

    dll = MachO(cur_path)

    changedict = paths_to_paths

    def changefunc(path):
        npath = changedict.get(path, None)
        return npath

    dll.rewriteLoadCommands(changefunc)

    try:
        f = open(dll.filename, 'rb+')
        for header in dll.headers:
            f.seek(0)
            dll.write(f)
        f.seek(0, 2)
        f.flush()
        f.close()
    except Exception:
        pass

    return
예제 #2
0
def rewriteFramework(framework, frameworkMap):

    basename = os.path.splitext(os.path.basename(framework))[0]
    dyld = os.path.abspath(os.path.join(framework, basename))

    macho = MachO(dyld)

    def changefunc(key):
        if key == dyld:
            return dyld

        dirname, filename = os.path.split(key)
        return frameworkMap.get(filename)

    macho.rewriteLoadCommands(changefunc)
    macho.write(open(dyld, "rb+"))
예제 #3
0
def rewriteFramework(framework, frameworkMap):

    basename = os.path.splitext(os.path.basename(framework))[0]
    dyld = os.path.abspath(os.path.join(framework, basename))

    macho = MachO(dyld)

    def changefunc(key):
        if key == dyld:
            return dyld

        dirname, filename = os.path.split(key)
        return frameworkMap.get(filename)

    macho.rewriteLoadCommands(changefunc)
    macho.write(open(dyld, "rb+"))
예제 #4
0
def set_macos_sdk_version(filename, major, minor, revision):
    """
    Overwrite the macOS SDK version declared in the given binary with the specified version.

    NOTE: currently, only version in the first arch slice is modified.
    """
    # Validate values
    assert 0 <= major <= 255, "Invalid major version value!"
    assert 0 <= minor <= 255, "Invalid minor version value!"
    assert 0 <= revision <= 255, "Invalid revision value!"
    # Open binary
    binary = MachO(filename)
    header = binary.headers[0]
    # Find version command using helper
    version_cmd = _find_version_cmd(header)
    # Write new SDK version number
    version_cmd[1].sdk = major << 16 | minor << 8 | revision
    # Write changes back.
    with open(binary.filename, 'rb+') as fp:
        binary.write(fp)
예제 #5
0
def fix_exe_for_code_signing(filename):
    """
    Fixes the Mach-O headers to make code signing possible.

    Code signing on OS X does not work out of the box with embedding
    .pkg archive into the executable.

    The fix is done this way:
    - Make the embedded .pkg archive part of the Mach-O 'String Table'.
      'String Table' is at end of the OS X exe file so just change the size
      of the table to cover the end of the file.
    - Fix the size of the __LINKEDIT segment.

    Mach-O format specification:

    http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html
    """
    exe_data = MachO(filename)
    # Every load command is a tupple: (cmd_metadata, segment, [section1, section2])
    cmds = exe_data.headers[
        0].commands  # '0' - Exe contains only one architecture.
    file_size = exe_data.headers[0].size

    ## Make the embedded .pkg archive part of the Mach-O 'String Table'.
    # Data about 'String Table' is in LC_SYMTAB load command.
    for c in cmds:
        if c[0].get_cmd_name() == 'LC_SYMTAB':
            data = c[1]
            # Increase the size of 'String Table' to cover the embedded .pkg file.
            new_strsize = file_size - data.stroff
            data.strsize = new_strsize
    ## Fix the size of the __LINKEDIT segment.
    # __LINKEDIT segment data is the 4th item in the executable.
    linkedit = cmds[3][1]
    new_segsize = file_size - linkedit.fileoff
    linkedit.filesize = new_segsize
    linkedit.vmsize = new_segsize
    ## Write changes back.
    fp = open(exe_data.filename, 'rb+')
    exe_data.write(fp)
    fp.close()
예제 #6
0
파일: osx.py 프로젝트: ChaiZQ/pyinstaller
def fix_exe_for_code_signing(filename):
    """
    Fixes the Mach-O headers to make code signing possible.

    Code signing on OS X does not work out of the box with embedding
    .pkg archive into the executable.

    The fix is done this way:
    - Make the embedded .pkg archive part of the Mach-O 'String Table'.
      'String Table' is at end of the OS X exe file so just change the size
      of the table to cover the end of the file.
    - Fix the size of the __LINKEDIT segment.

    Mach-O format specification:

    http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html
    """
    exe_data = MachO(filename)
    # Every load command is a tupple: (cmd_metadata, segment, [section1, section2])
    cmds = exe_data.headers[0].commands  # '0' - Exe contains only one architecture.
    file_size = exe_data.headers[0].size

    ## Make the embedded .pkg archive part of the Mach-O 'String Table'.
    # Data about 'String Table' is in LC_SYMTAB load command.
    for c in cmds:
        if c[0].get_cmd_name() == 'LC_SYMTAB':
            data = c[1]
            # Increase the size of 'String Table' to cover the embedded .pkg file.
            new_strsize = file_size - data.stroff
            data.strsize = new_strsize
    ## Fix the size of the __LINKEDIT segment.
    # __LINKEDIT segment data is the 4th item in the executable.
    linkedit = cmds[3][1]
    new_segsize = file_size - linkedit.fileoff
    linkedit.filesize = new_segsize
    linkedit.vmsize = new_segsize
    ## Write changes back.
    fp = open(exe_data.filename, 'rb+')
    exe_data.write(fp)
    fp.close()
예제 #7
0
def modify_object_macholib(cur_path, old_dir, new_dir):
    """
    Modify MachO binary path_name by replacing old_dir with new_dir
    or the relative path to spack install root.
    The old install dir in LC_ID_DYLIB is replaced with the new install dir
    using py-macholib
    The old install dir in LC_LOAD_DYLIB is replaced with the new install dir
    using py-macholib
    The old install dir in LC_RPATH is replaced with the new install dir using
    using py-macholib
    """
    if cur_path.endswith('.o'):
        return
    try:
        from macholib.MachO import MachO
    except ImportError as e:
        raise MissingMacholibException(e)

    def match_func(cpath):
        rpath = cpath.replace(old_dir, new_dir)
        return rpath

    dll = MachO(cur_path)
    dll.rewriteLoadCommands(match_func)
    try:
        f = open(dll.filename, 'rb+')
        for header in dll.headers:
            f.seek(0)
            dll.write(f)
        f.seek(0, 2)
        f.flush()
        f.close()
    except Exception:
        pass

    return
예제 #8
0
def mac_set_relative_dylib_deps(libname, distname):
    """
    On Mac OS X set relative paths to dynamic library dependencies
    of `libname`.

    Relative paths allow to avoid using environment variable DYLD_LIBRARY_PATH.
    There are known some issues with DYLD_LIBRARY_PATH. Relative paths is
    more flexible mechanism.

    Current location of dependend libraries is derived from the location
    of the library path (paths start with '@loader_path').

    'distname'  path of the library relative to dist directory of frozen
                executable. We need this to determine the level of directory
                level for @loader_path of binaries not found in dist directory.

                E.g. qt4 plugins are not in the same directory as Qt*.dylib
                files. Without using '@loader_path/../..' for qt plugins
                Mac OS X would not be able to resolve shared library
                dependencies and qt plugins will not be loaded.
    """

    from macholib import util
    from macholib.MachO import MachO

    # Ignore bootloader otherwise PyInstaller fails with exception like
    # 'ValueError: total_size > low_offset (288 > 0)'
    if os.path.basename(libname) in _BOOTLOADER_FNAMES:
        return

    # Determine how many directories up is the directory with shared
    # dynamic libraries. '../'
    # E.g.  ./qt4_plugins/images/ -> ./../../
    parent_dir = ''
    # Check if distname is not only base filename.
    if os.path.dirname(distname):
        parent_level = len(os.path.dirname(distname).split(os.sep))
        parent_dir = parent_level * (os.pardir + os.sep)

    def match_func(pth):
        """
        For system libraries is still used absolute path. It is unchanged.
        """
        # Leave system dynamic libraries unchanged
        if util.in_system_path(pth):
            return None

        # The older python.org builds that use system Tcl/Tk framework
        # have their _tkinter.cpython-*-darwin.so library linked against
        # /Library/Frameworks/Tcl.framework/Versions/8.5/Tcl and
        # /Library/Frameworks/Tk.framework/Versions/8.5/Tk, although the
        # actual frameworks are located in /System/Library/Frameworks.
        # Therefore, they slip through the above in_system_path() check,
        # and we need to exempt them manually.
        _exemptions = [
            '/Library/Frameworks/Tcl.framework/',
            '/Library/Frameworks/Tk.framework/'
        ]
        if any([x in pth for x in _exemptions]):
            return None

        # Use relative path to dependent dynamic libraries based on the
        # location of the executable.
        return os.path.join('@loader_path', parent_dir, os.path.basename(pth))

    # Rewrite mach headers with @loader_path.
    dll = MachO(libname)
    dll.rewriteLoadCommands(match_func)

    # Write changes into file.
    # Write code is based on macholib example.
    try:
        with open(dll.filename, 'rb+') as f:
            for header in dll.headers:
                f.seek(0)
                dll.write(f)
            f.seek(0, 2)
            f.flush()
    except Exception:
        pass
예제 #9
0
class CMachoFuzzer:
    def __init__(self):
        self.macho = None
        self.fuzz_properties = ["headers"]
        self.fuzz_sub_properties = {
            "headers": ["header", "commands", "headers"]
        }

        self.changes = []
        self.change_list = []

    def do_fuzz_headers(self):
        # Select a random header
        header = random.choice(self.macho.headers)
        idx = self.macho.headers.index(header)
        self.changes.append(["Header %d" % idx])
        prop = random.choice(self.fuzz_sub_properties["headers"])

        if prop == "header":
            fields = random.choice(header.header._fields_)
            field = fields[0]

            change_name = "header %d field %s" % (idx, field)
            if change_name in self.change_list or field in BANNED_FIELDS:
                #print "Ignoring already applied change %s" % change_name
                del self.changes[len(self.changes) - 1]
                return

            self.changes[len(self.changes) - 1].append("Field %s" % field)
            l = "header.header.%s = %d" % (field, get_random_value(fields[1]))
            exec(l)

            self.change_list.append(change_name)
        elif prop == "commands":
            cmd = random.choice(header.commands)
            idx = header.commands.index(cmd)
            self.changes[len(self.changes) - 1].append("Command %d" % idx)

            subidx = random.randint(0, len(cmd) - 1)
            subcmd = cmd[subidx]

            if '_fields_' in dir(subcmd):
                if len(subcmd._fields_) > 0:
                    fields = random.choice(subcmd._fields_)
                    field = fields[0]
                    self.changes[len(self.changes) - 1].append("Field %s" %
                                                               field)
                    str_type = str(type(eval("subcmd.%s" % field)))
                    if str_type in SUPPORTED_FIELD_TYPES:
                        l = "subcmd.%s = " % field
                        l += str(get_random_value(fields[1]))
                        exec(l)
                    else:
                        #print "Ignoring unsupported field type", str_type, field
                        del self.changes[len(self.changes) - 1]
                else:
                    print "Ignoring empty subcmd", subcmd
                    del self.changes[len(self.changes) - 1]
            elif type(subcmd) is str:
                #print "Ignoring unsupported (by macholib) string sub-command"
                del self.changes[len(self.changes) - 1]
            else:
                print type(subcmd), subcmd
                if type(subcmd) is list and len(subcmd) > 0:
                    field = random.choice(subcmd)
                    subidx = subcmd.index(field)
                    self.changes[len(self.changes) - 1].append(
                        "List element %d" % subcmd.index(field))

                    fields = random.choice(field._fields_)
                    field_name = fields[0]
                    self.changes[len(self.changes) - 1].append("Field %s" %
                                                               field_name)

                    l = "field.%s = " % field_name
                    l += str(get_random_value(fields[1]))
                    exec(l)
                else:
                    del self.changes[len(self.changes) - 1]
            #self.changes[len(self.changes)-1].append("Sub-command %d" % sub_idx)
        elif prop == "headers":
            del self.changes[len(self.changes) - 1]
            #print "Not yet supported headers"
            #raise Exception("Implement headers")
        else:
            del self.changes[len(self.changes) - 1]

    def do_fuzz_internal(self):
        assert (self.macho is not None)

        element = random.choice(self.fuzz_properties)
        if element == "headers":
            self.do_fuzz_headers()
        else:
            raise Exception("Unknown element to fuzz %s" % repr(element))

    def fuzz(self, filename, output_filename):
        self.macho = MachO(filename)

        changes = random.randint(1, 25)
        for i in range(changes * 5):
            self.do_fuzz_internal()
            if len(self.change_list) == changes:
                break

        # Copy the contents of the original file to the output file
        f = open(output_filename, "wb+")
        f.write(open(filename, "rb").read())
        f.close()

        # Update it's contents
        f = open(output_filename, "rb+")
        self.macho.write(f)
        f.close()

        # And write the .diff file
        f = open(output_filename + ".diff", "wb")
        f.write("# Original file created by 'MachO Mutator' was %s\n" %
                filename)
        for change in self.changes:
            print "# CHANGE: %s" % ", ".join(change)
            f.write("# CHANGE: %s\n" % ", ".join(change))
        f.close()

        os.system("radiff2 %s %s" % (filename, output_filename))
예제 #10
0
파일: dylib.py 프로젝트: L37sg0/key-robot
def mac_set_relative_dylib_deps(libname, distname):
    """
    On Mac OS X set relative paths to dynamic library dependencies
    of `libname`.

    Relative paths allow to avoid using environment variable DYLD_LIBRARY_PATH.
    There are known some issues with DYLD_LIBRARY_PATH. Relative paths is
    more flexible mechanism.

    Current location of dependend libraries is derived from the location
    of the library path (paths start with '@loader_path').

    'distname'  path of the library relative to dist directory of frozen
                executable. We need this to determine the level of directory
                level for @loader_path of binaries not found in dist directory.

                E.g. qt4 plugins are not in the same directory as Qt*.dylib
                files. Without using '@loader_path/../..' for qt plugins
                Mac OS X would not be able to resolve shared library
                dependencies and qt plugins will not be loaded.
    """

    from macholib import util
    from macholib.MachO import MachO

    # Ignore bootloader otherwise PyInstaller fails with exception like
    # 'ValueError: total_size > low_offset (288 > 0)'
    if os.path.basename(libname) in _BOOTLOADER_FNAMES:
        return

    # Determine how many directories up is the directory with shared
    # dynamic libraries. '../'
    # E.g.  ./qt4_plugins/images/ -> ./../../
    parent_dir = ''
    # Check if distname is not only base filename.
    if os.path.dirname(distname):
        parent_level = len(os.path.dirname(distname).split(os.sep))
        parent_dir = parent_level * (os.pardir + os.sep)

    def match_func(pth):
        """
        For system libraries is still used absolute path. It is unchanged.
        """
        # Match non system dynamic libraries.
        if not util.in_system_path(pth):
            # Use relative path to dependend dynamic libraries bases on
            # location of the executable.
            return os.path.join('@loader_path', parent_dir,
                os.path.basename(pth))

    # Rewrite mach headers with @loader_path.
    dll = MachO(libname)
    dll.rewriteLoadCommands(match_func)

    # Write changes into file.
    # Write code is based on macholib example.
    try:
        with open(dll.filename, 'rb+') as f:
            for header in dll.headers:
                f.seek(0)
                dll.write(f)
            f.seek(0, 2)
            f.flush()
    except Exception:
        pass
예제 #11
0
def fix_exe_for_code_signing(filename):
    """
    Fixes the Mach-O headers to make code signing possible.

    Code signing on Mac OS does not work out of the box with embedding .pkg archive into the executable.

    The fix is done this way:
    - Make the embedded .pkg archive part of the Mach-O 'String Table'. 'String Table' is at end of the Mac OS exe file,
      so just change the size of the table to cover the end of the file.
    - Fix the size of the __LINKEDIT segment.

    Note: the above fix works only if the single-arch thin executable or the last arch slice in a multi-arch fat
    executable is not signed, because LC_CODE_SIGNATURE comes after LC_SYMTAB, and because modification of headers
    invalidates the code signature. On modern arm64 macOS, code signature is mandatory, and therefore compilers
    create a dummy signature when executable is built. In such cases, that signature needs to be removed before this
    function is called.

    Mach-O format specification: http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html
    """
    # Estimate the file size after data was appended
    file_size = os.path.getsize(filename)

    # Take the last available header. A single-arch thin binary contains a single slice, while a multi-arch fat binary
    # contains multiple, and we need to modify the last one, which is adjacent to the appended data.
    executable = MachO(filename)
    header = executable.headers[-1]

    # Sanity check: ensure the executable slice is not signed (otherwise signature's section comes last in the
    # __LINKEDIT segment).
    sign_sec = [cmd for cmd in header.commands if cmd[0].cmd == LC_CODE_SIGNATURE]
    assert len(sign_sec) == 0, "Executable contains code signature!"

    # Find __LINKEDIT segment by name (16-byte zero padded string)
    __LINKEDIT_NAME = b'__LINKEDIT\x00\x00\x00\x00\x00\x00'
    linkedit_seg = [cmd for cmd in header.commands if cmd[0].cmd == LC_SEGMENT_64 and cmd[1].segname == __LINKEDIT_NAME]
    assert len(linkedit_seg) == 1, "Expected exactly one __LINKEDIT segment!"
    linkedit_seg = linkedit_seg[0][1]  # Take the segment command entry
    # Find SYMTAB section
    symtab_sec = [cmd for cmd in header.commands if cmd[0].cmd == LC_SYMTAB]
    assert len(symtab_sec) == 1, "Expected exactly one SYMTAB section!"
    symtab_sec = symtab_sec[0][1]  # Take the symtab command entry

    # The string table is located at the end of the SYMTAB section, which in turn is the last section in the __LINKEDIT
    # segment. Therefore, the end of SYMTAB section should be aligned with the end of __LINKEDIT segment, and in turn
    # both should be aligned with the end of the file (as we are in the last or the only arch slice).
    #
    # However, when removing the signature from the executable using codesign under Mac OS 10.13, the codesign utility
    # may produce an invalid file, with the declared length of the __LINKEDIT segment (linkedit_seg.filesize) pointing
    # beyond the end of file, as reported in issue #6167.
    #
    # We can compensate for that by not using the declared sizes anywhere, and simply recompute them. In the final
    # binary, the __LINKEDIT segment and the SYMTAB section MUST end at the end of the file (otherwise, we have bigger
    # issues...). So simply recompute the declared sizes as difference between the final file length and the
    # corresponding start offset.
    symtab_sec.strsize = file_size - symtab_sec.stroff
    linkedit_seg.filesize = file_size - linkedit_seg.fileoff

    # Compute new vmsize by rounding filesize up to full page size.
    page_size = (0x4000 if _get_arch_string(header.header).startswith('arm64') else 0x1000)
    linkedit_seg.vmsize = math.ceil(linkedit_seg.filesize / page_size) * page_size

    # NOTE: according to spec, segments need to be aligned to page boundaries: 0x4000 (16 kB) for arm64, 0x1000 (4 kB)
    # for other arches. But it seems we can get away without rounding and padding the segment file size - perhaps
    # because it is the last one?

    # Write changes
    with open(filename, 'rb+') as fp:
        executable.write(fp)

    # In fat binaries, we also need to adjust the fat header. macholib as of version 1.14 does not support this, so we
    # need to do it ourselves...
    if executable.fat:
        from macholib.mach_o import (FAT_MAGIC, FAT_MAGIC_64, fat_arch, fat_arch64, fat_header)
        with open(filename, 'rb+') as fp:
            # Taken from MachO.load_fat() implementation. The fat header's signature has already been validated when we
            # loaded the file for the first time.
            fat = fat_header.from_fileobj(fp)
            if fat.magic == FAT_MAGIC:
                archs = [fat_arch.from_fileobj(fp) for i in range(fat.nfat_arch)]
            elif fat.magic == FAT_MAGIC_64:
                archs = [fat_arch64.from_fileobj(fp) for i in range(fat.nfat_arch)]
            # Adjust the size in the fat header for the last slice.
            arch = archs[-1]
            arch.size = file_size - arch.offset
            # Now write the fat headers back to the file.
            fp.seek(0)
            fat.to_fileobj(fp)
            for arch in archs:
                arch.to_fileobj(fp)
예제 #12
0
class CMachoFuzzer:
  def __init__(self):
    self.macho = None
    self.fuzz_properties = ["headers"]
    self.fuzz_sub_properties = {"headers":["header", "commands", "headers"]}
    
    self.changes = []
    self.change_list = []

  def do_fuzz_headers(self):
    # Select a random header
    header = random.choice(self.macho.headers)
    idx = self.macho.headers.index(header)
    self.changes.append(["Header %d" % idx])
    prop = random.choice(self.fuzz_sub_properties["headers"])

    if prop == "header":
      fields = random.choice(header.header._fields_)
      field = fields[0]
      
      change_name = "header %d field %s" % (idx, field)
      if change_name in self.change_list or field in BANNED_FIELDS:
        #print "Ignoring already applied change %s" % change_name
        del self.changes[len(self.changes)-1]
        return

      self.changes[len(self.changes)-1].append("Field %s" % field)
      l = "header.header.%s = %d" % (field, get_random_value(fields[1]))
      exec(l)
      
      self.change_list.append(change_name)
    elif prop == "commands":
      cmd = random.choice(header.commands)
      idx = header.commands.index(cmd)
      self.changes[len(self.changes)-1].append("Command %d" % idx)
      
      subidx = random.randint(0, len(cmd)-1)
      subcmd = cmd[subidx]

      if '_fields_' in dir(subcmd):
        if len(subcmd._fields_) > 0:
          fields = random.choice(subcmd._fields_)
          field = fields[0]
          self.changes[len(self.changes)-1].append("Field %s" % field)
          str_type = str(type(eval("subcmd.%s" % field)))
          if str_type in SUPPORTED_FIELD_TYPES:
            l = "subcmd.%s = " % field
            l += str(get_random_value(fields[1]))
            exec(l)
          else:
            #print "Ignoring unsupported field type", str_type, field
            del self.changes[len(self.changes)-1]
        else:
          print "Ignoring empty subcmd", subcmd
          del self.changes[len(self.changes)-1]
      elif type(subcmd) is str:
        #print "Ignoring unsupported (by macholib) string sub-command"
        del self.changes[len(self.changes)-1]
      else:
        print type(subcmd), subcmd
        if type(subcmd) is list and len(subcmd) > 0:
          field = random.choice(subcmd)
          subidx = subcmd.index(field)
          self.changes[len(self.changes)-1].append("List element %d" % subcmd.index(field))

          fields = random.choice(field._fields_)
          field_name = fields[0]
          self.changes[len(self.changes)-1].append("Field %s" % field_name)

          l = "field.%s = " % field_name
          l += str(get_random_value(fields[1]))
          exec(l)
        else:
          del self.changes[len(self.changes)-1]
      #self.changes[len(self.changes)-1].append("Sub-command %d" % sub_idx)
    elif prop == "headers":
      del self.changes[len(self.changes)-1]
      #print "Not yet supported headers"
      #raise Exception("Implement headers")
    else:
      del self.changes[len(self.changes)-1]

  def do_fuzz_internal(self):
    assert(self.macho is not None)
    
    element = random.choice(self.fuzz_properties)
    if element == "headers":
      self.do_fuzz_headers()
    else:
      raise Exception("Unknown element to fuzz %s" % repr(element))

  def fuzz(self, filename, output_filename):
    self.macho = MachO(filename)

    changes = random.randint(1, 25)
    for i in range(changes*5):
      self.do_fuzz_internal()
      if len(self.change_list) == changes:
        break

    # Copy the contents of the original file to the output file
    f = open(output_filename, "wb+")
    f.write(open(filename, "rb").read())
    f.close()

    # Update it's contents
    f = open(output_filename, "rb+")
    self.macho.write(f)
    f.close()

    # And write the .diff file
    f = open(output_filename + ".diff", "wb")
    f.write("# Original file created by 'MachO Mutator' was %s\n" % filename)
    for change in self.changes:
      print "# CHANGE: %s" % ", ".join(change)
      f.write("# CHANGE: %s\n" % ", ".join(change))
    f.close()
    
    os.system("radiff2 %s %s" % (filename, output_filename))
예제 #13
0
class HeaderMangler(object):
    def __init__(self, binary_class, arch, exec_path, strategy, verbose=False):
        self.path = exec_path
        self.mangled_text_segment = False
        self.header_analyzed = False
        self.shrink_by = 0
        self.header_metadata = None

        self.strategy = strategy
        self.verbose = verbose

        self.fname = None
        self.faddr = 0
        self.fsize = 0
        self.arch = arch
        self.binary_class = binary_class

        self.total_shrink = 0

        self.entry_func = []

        self.total_fn_size = 0
        self.zero_pad = 0
        self.text_pad = 0

        self.TEXT_orig = None
        self.fh = None
        self.out_fh = None

        self.binary_class = binary_class

        if arch == ARCH_ARM:
            self.code_mangler = ARM_Mangler()
        elif arch == ARCH_X86_64:
            self.code_mangler = X86_64_Mangler()
        else:
            raise Exception("Unknown arch: " + arch)

    def analyze_header(self):
        self.macho = MachO(self.path)
        self.header_metadata = HeaderMetadata(self.macho, self.binary_class)
        self.header_metadata.read_tables()
        self.header_analyzed = True

    def write_header(self, fh, total_shrink):
        h = self.macho.headers[0]
        h.size -= total_shrink
        self.macho.write(fh)

    def adjust_nl_pointers(self, nldata, shirnk):
        return nldata

    def adjust_la_pointers(self, ladata, shrink):
        off = 0
        new_ladata = ""
        size = LA_PTR_SIZE_32 if self.binary_class == CLASS_MACHO else LA_PTR_SIZE_64

        while off < len(ladata):
            if self.binary_class == CLASS_MACHO64:
                lptr = la_ptr_64.from_str(ladata[off:(off + size)])
                lptr.ptr = big_swap_u64(lptr.ptr)
                lptr.ptr -= shrink
                lptr.ptr = little_swap_u64(lptr.ptr)
                new_ladata += lptr.to_str()
                off += LA_PTR_SIZE_64
            else:
                lptr = la_ptr_32.from_str(ladata[off:(off + size)])
                lptr.ptr = big_swap_u32(lptr.ptr)
                lptr.ptr -= shrink
                lptr.ptr = little_swap_u32(lptr.ptr)
                new_ladata += lptr.to_str()
                off += LA_PTR_SIZE_32

        return new_ladata

    def read_TEXT_segment(self, in_fh):
        in_fh.seek(self.header_metadata.textoffset)
        self.TEXT_orig = in_fh.read(self.header_metadata.TEXTsize -
                                    self.header_metadata.textoffset)

    def adjust_symtable(self, fname):
        ntype = 36 if self.binary_class == CLASS_MACHO64 else 15
        return len(self.header_metadata.nlists)
        copy = list(self.header_metadata.nlists)
        for i in xrange(len(copy) - 1, -1, -1):
            if fname in self.header_metadata.nlists[i][1]:
                if self.header_metadata.nlists[i][0].n_type == ntype:
                    del self.header_metadata.nlists[i - 1]
                    del self.header_metadata.nlists[i - 1]
                    del self.header_metadata.nlists[i - 1]
                    del self.header_metadata.nlists[i - 1]
                    i -= 4
                else:
                    del self.header_metadata.nlists[i]
            elif self.header_metadata.nlists[i][
                    0].n_type == ntype and "_" in self.header_metadata.nlists[
                        i][1]:
                self.header_metadata.nlists[i][0].n_value -= self.total_fn_size
                self.header_metadata.nlists[i -
                                            1][0].n_value -= self.total_fn_size
            elif "_main" in self.header_metadata.nlists[i][1]:
                self.header_metadata.nlists[i][0].n_value -= self.total_fn_size
                self.header_metadata.nlists[i -
                                            1][0].n_value -= self.total_fn_size

        return len(self.header_metadata.nlists)

    def adjust_function_starts(self, removed_funcs, all_funcs):
        funcs = self.header_metadata.read_func_starts()
        x = decode_uleb128(funcs[FUNCS_START_BA], 0)
        funcs_ba = bytearray()
        addr = x[ULEB_DATA]
        old_offset = 0
        i = 0
        found = False

        while x[ULEB_OFF] != funcs[FUNCS_START_SIZE]:
            found = False
            for func in removed_funcs:
                if all_funcs[func][ADDR] == addr:
                    found = True
                    break
            old_offset = x[ULEB_OFF]
            x = decode_uleb128(funcs[FUNCS_START_BA], x[ULEB_OFF])
            addr += x[ULEB_DATA]

            if found:
                pass
                #print "addr: " + hex(x[ULEB_OFF] - old_offset)
                #funcs_ba += bytearray(x[ULEB_OFF] - old_offset)
            else:
                funcs_ba += funcs[FUNCS_START_BA][i:old_offset]
                i = old_offset

        # TODO calculate the function starts and write them
        temp = bytearray()
        for i in xrange(0, len(funcs[FUNCS_START_BA])):
            temp += bytes(0)
        self.header_metadata.funcs_start_data = temp

    def adjust_offset(self, off, symoff, shrink, total_shrink):
        if off <= 0:
            return off
        elif off > symoff:
            return off - total_shrink
        else:
            return off - shrink

    def remove_funcs(self, funcs, all_funcs, ordered_funcs):
        if not self.header_analyzed:
            self.analyze_header()

        self.fh = open(self.path, 'rb')
        self.read_TEXT_segment(self.fh)

        for f in funcs:
            self.remove_func(f, all_funcs[f], all_funcs)

        # after we've made the changes to remove the functions, adjust the load commands
        self.adjust_lcs()
        self.adjust_function_starts(funcs, all_funcs)

        if self.strategy == STRATEGY_REMOVE and self.verbose:
            print "__text size reduced by: " + str(self.total_fn_size +
                                                   self.total_shrink)

        self.out_fh = open(self.path + "-debloated", 'wb')
        self.write_header(self.out_fh, self.total_shrink)

    def adjust_lcs(self):
        if self.strategy == STRATEGY_NOPS:
            return

        # stubs are in ARM mode (4 bytes / insn) and __text is in thumb mode (2 bytes / insn)
        # force the alignment of the stubs region to 4 bytes
        if self.arch == ARCH_ARM and self.total_fn_size % 4 != 0:
            self.text_pad = 2
            self.fsize -= self.text_pad
            self.total_fn_size -= self.text_pad
            self.zero_pad -= self.text_pad

        # we can't possibly remove an odd number of bytes from ARM code, so something has gone wrong
        if self.arch == ARCH_ARM:
            assert (self.total_fn_size % 4 == 0)

        segtext = self.header_metadata.sections["__TEXT"]["__TEXT"]
        secttext = self.header_metadata.sections["__TEXT"]["__text"]

        symoff = self.header_metadata.lcs[LC_SYMTAB].symoff

        for segname, _ in self.header_metadata.sections.iteritems():
            for sectname, cmd in self.header_metadata.sections[
                    segname].iteritems():
                if isinstance(cmd, segment_command_64) or isinstance(
                        cmd, segment_command):
                    if SEG_DATA in cmd.segname:
                        cmd.fileoff -= self.shrink_by
                    if SEG_LINKEDIT in cmd.segname:
                        cmd.filesize = cmd.filesize - self.total_shrink - self.shrink_by
                elif SEG_TEXT in segname and SECT_TEXT not in cmd.sectname:
                    cmd.addr -= self.total_fn_size
                    cmd.offset -= self.total_fn_size
                else:
                    if cmd.offset > secttext.offset and SEG_TEXT in segname:
                        cmd.offset -= self.total_fn_size

        segtext.filesize -= self.shrink_by
        secttext.size -= self.total_fn_size

        self.header_metadata.lcs[
            LC_DYLD_INFO_ONLY].rebase_off = self.adjust_offset(
                self.header_metadata.lcs[LC_DYLD_INFO_ONLY].rebase_off, symoff,
                self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[
            LC_DYLD_INFO_ONLY].bind_off = self.adjust_offset(
                self.header_metadata.lcs[LC_DYLD_INFO_ONLY].bind_off, symoff,
                self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[
            LC_DYLD_INFO_ONLY].weak_bind_off = self.adjust_offset(
                self.header_metadata.lcs[LC_DYLD_INFO_ONLY].weak_bind_off,
                symoff, self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[
            LC_DYLD_INFO_ONLY].lazy_bind_off = self.adjust_offset(
                self.header_metadata.lcs[LC_DYLD_INFO_ONLY].lazy_bind_off,
                symoff, self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[
            LC_DYLD_INFO_ONLY].export_off = self.adjust_offset(
                self.header_metadata.lcs[LC_DYLD_INFO_ONLY].export_off, symoff,
                self.shrink_by, self.total_shrink)

        self.header_metadata.lcs[LC_DYSYMTAB].tocoff = self.adjust_offset(
            self.header_metadata.lcs[LC_DYSYMTAB].tocoff, symoff,
            self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[
            LC_DYSYMTAB].indirectsymoff = self.adjust_offset(
                self.header_metadata.lcs[LC_DYSYMTAB].indirectsymoff, symoff,
                self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[LC_DYSYMTAB].modtaboff = self.adjust_offset(
            self.header_metadata.lcs[LC_DYSYMTAB].modtaboff, symoff,
            self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[
            LC_DYSYMTAB].extrefsymoff = self.adjust_offset(
                self.header_metadata.lcs[LC_DYSYMTAB].extrefsymoff, symoff,
                self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[LC_DYSYMTAB].extreloff = self.adjust_offset(
            self.header_metadata.lcs[LC_DYSYMTAB].extreloff, symoff,
            self.shrink_by, self.total_shrink)
        self.header_metadata.lcs[LC_DYSYMTAB].locreloff = self.adjust_offset(
            self.header_metadata.lcs[LC_DYSYMTAB].locreloff, symoff,
            self.shrink_by, self.total_shrink)

        if LC_ENCRYPTION_INFO in self.header_metadata.lcs:
            self.header_metadata.lcs[
                LC_ENCRYPTION_INFO].cryptoff = self.adjust_offset(
                    self.header_metadata.lcs[LC_ENCRYPTION_INFO].cryptoff,
                    symoff, self.shrink_by, self.total_shrink)

        self.header_metadata.lcs[
            LC_FUNCTION_STARTS].dataoff = self.adjust_offset(
                self.header_metadata.lcs[LC_FUNCTION_STARTS].dataoff, symoff,
                self.shrink_by, self.total_shrink)

        self.header_metadata.lcs[LC_DATA_IN_CODE].dataoff = self.adjust_offset(
            self.header_metadata.lcs[LC_DATA_IN_CODE].dataoff, symoff,
            self.shrink_by, self.total_shrink)

        if LC_DYLIB_CODE_SIGN_DRS in self.header_metadata.lcs:
            self.header_metadata.lcs[
                LC_DYLIB_CODE_SIGN_DRS].dataoff = self.adjust_offset(
                    self.header_metadata.lcs[LC_DYLIB_CODE_SIGN_DRS].dataoff,
                    symoff, self.shrink_by, self.total_shrink)

        if LC_CODE_SIGNATURE in self.header_metadata.lcs:
            self.header_metadata.lcs[
                LC_CODE_SIGNATURE].dataoff = self.adjust_offset(
                    self.header_metadata.lcs[LC_CODE_SIGNATURE].dataoff,
                    symoff, self.shrink_by, self.total_shrink)

        self.header_metadata.lcs[LC_SYMTAB].symoff -= self.shrink_by
        self.header_metadata.lcs[LC_SYMTAB].stroff -= self.total_shrink

    def remove_func(self, fname, func, all_funcs):
        fsize = func[SIZE]
        faddr = func[ADDR]
        diff = func[DIFF]

        if self.strategy == STRATEGY_NOPS:
            return

        if diff == 0:
            prev_func = func[PREV_FUNC]
            if prev_func is not None:
                diff = all_funcs[prev_func][DIFF]

        self.fname = fname
        self.faddr = faddr
        self.fsize = self.shrink_by = fsize + diff
        self.total_fn_size += self.fsize
        self.entry_func = all_funcs["entry0"]

        self.zero_pad += self.shrink_by
        self.shrink_by = 0

        segtext = self.header_metadata.sections["__TEXT"]["__TEXT"]
        secttext = self.header_metadata.sections["__TEXT"]["__text"]

        # wipe unnecessary symbols
        diff_syms = len(self.header_metadata.nlists)
        no_syms = self.adjust_symtable(fname)
        diff_syms -= no_syms

        # add the size of the number of symbols wiped from the symtable
        if self.binary_class == CLASS_MACHO64:
            current_shrink = self.shrink_by + (
                self.header_metadata.lcs[LC_SYMTAB].nsyms -
                no_syms) * NLIST64_SIZE
        else:
            current_shrink = self.shrink_by + (
                self.header_metadata.lcs[LC_SYMTAB].nsyms -
                no_syms) * NLIST32_SIZE
        self.total_shrink += current_shrink
        self.header_metadata.lcs[LC_SYMTAB].nsyms = no_syms

        if self.entry_func[ADDR] > self.faddr:
            self.header_metadata.lcs[LC_MAIN].entryoff -= self.fsize

        self.header_metadata.lcs[LC_DYSYMTAB].nlocalsym = dec_syms(
            self.header_metadata.lcs[LC_DYSYMTAB].nlocalsym, diff_syms)
        self.header_metadata.lcs[LC_DYSYMTAB].iextdefsym = dec_syms(
            self.header_metadata.lcs[LC_DYSYMTAB].iextdefsym, diff_syms)
        self.header_metadata.lcs[LC_DYSYMTAB].iundefsym = dec_syms(
            self.header_metadata.lcs[LC_DYSYMTAB].iundefsym, diff_syms)
예제 #14
0
파일: osx.py 프로젝트: pbraga88/study
def fix_exe_for_code_signing(filename):
    """
    Fixes the Mach-O headers to make code signing possible.

    Code signing on OS X does not work out of the box with embedding
    .pkg archive into the executable.

    The fix is done this way:
    - Make the embedded .pkg archive part of the Mach-O 'String Table'.
      'String Table' is at end of the OS X exe file so just change the size
      of the table to cover the end of the file.
    - Fix the size of the __LINKEDIT segment.

    Note: the above fix works only if the single-arch thin executable or
    the last arch slice in a multi-arch fat executable is not signed,
    because LC_CODE_SIGNATURE comes after LC_SYMTAB, and because modification
    of headers invalidates the code signature. On modern arm64 macOS, code
    signature is mandatory, and therefore compilers create a dummy
    signature when executable is built. In such cases, that signature
    needs to be removed before this function is called.

    Mach-O format specification:

    http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html
    """
    # Estimate the file size after data was appended
    file_size = os.path.getsize(filename)

    # Take the last available header. A single-arch thin binary contains a
    # single slice, while a multi-arch fat binary contains multiple, and we
    # need to modify the last one, which is adjacent to the appended data.
    executable = MachO(filename)
    header = executable.headers[-1]

    # Sanity check: ensure the executable slice is not signed (otherwise
    # signature's section comes last in the __LINKEDIT segment).
    sign_sec = [cmd for cmd in header.commands
                if cmd[0].cmd == LC_CODE_SIGNATURE]
    assert len(sign_sec) == 0, "Executable contains code signature!"

    # Find __LINKEDIT segment by name (16-byte zero padded string)
    __LINKEDIT_NAME = b'__LINKEDIT\x00\x00\x00\x00\x00\x00'
    linkedit_seg = [cmd for cmd in header.commands
                    if cmd[0].cmd == LC_SEGMENT_64
                    and cmd[1].segname == __LINKEDIT_NAME]
    assert len(linkedit_seg) == 1, "Expected exactly one __LINKEDIT segment!"
    linkedit_seg = linkedit_seg[0][1]  # Take the segment command entry
    # Find SYMTAB section
    symtab_sec = [cmd for cmd in header.commands
                  if cmd[0].cmd == LC_SYMTAB]
    assert len(symtab_sec) == 1, "Expected exactly one SYMTAB section!"
    symtab_sec = symtab_sec[0][1]  # Take the symtab command entry
    # Sanity check; the string table is located at the end of the SYMTAB
    # section, which in turn is the last section in the __LINKEDIT segment
    assert linkedit_seg.fileoff + linkedit_seg.filesize == \
           symtab_sec.stroff + symtab_sec.strsize, "Sanity check failed!"

    # Compute the old/declared file size (header.offset is zero for
    # single-arch thin binaries)
    old_file_size = \
        header.offset + linkedit_seg.fileoff + linkedit_seg.filesize
    delta = file_size - old_file_size
    # Expand the string table in SYMTAB section...
    symtab_sec.strsize += delta
    # .. as well as its parent __LINEDIT segment
    linkedit_seg.filesize += delta
    # FIXME: do we actually need to adjust in-memory size as well? It
    # seems unnecessary, as we have no use for the extended part being
    # loaded in the executable's address space...
    #linkedit_seg.vmsize += delta

    # NOTE: according to spec, segments need to be aligned to page
    # boundaries: 0x4000 (16 kB) for arm64, 0x1000 (4 kB) for other arches.
    # But it seems we can get away without rounding and padding the segment
    # size - perhaps because it's the last one?

    # Write changes
    with open(filename, 'rb+') as fp:
        executable.write(fp)

    # In fat binaries, we also need to adjust the fat header. macholib as
    # of version 1.14 does not support this, so we need to do it ourselves...
    if executable.fat:
        from macholib.mach_o import FAT_MAGIC, FAT_MAGIC_64
        from macholib.mach_o import fat_header, fat_arch, fat_arch64
        with open(filename, 'rb+') as fp:
            # Taken from MachO.load_fat() implementation. The fat
            # header's signature has already been validated when we
            # loaded the file for the first time.
            fat = fat_header.from_fileobj(fp)
            if fat.magic == FAT_MAGIC:
                archs = [fat_arch.from_fileobj(fp)
                         for i in range(fat.nfat_arch)]
            elif fat.magic == FAT_MAGIC_64:
                archs = [fat_arch64.from_fileobj(fp)
                         for i in range(fat.nfat_arch)]
            # Adjust the size in the fat header for the last slice
            arch = archs[-1]
            arch.size = file_size - arch.offset
            # Now write the fat headers back to the file
            fp.seek(0)
            fat.to_fileobj(fp)
            for arch in archs:
                arch.to_fileobj(fp)