def modify_object_macholib(cur_path, paths_to_paths): """ This function is used when install machO buildcaches on linux by rewriting mach-o loader commands for dependency library paths of mach-o binaries and the id path for mach-o libraries. Rewritting of rpaths is handled by replace_prefix_bin. Inputs mach-o binary to be modified dictionary mapping paths in old install layout to new install layout """ dll = MachO(cur_path) changedict = paths_to_paths def changefunc(path): npath = changedict.get(path, None) return npath dll.rewriteLoadCommands(changefunc) try: f = open(dll.filename, 'rb+') for header in dll.headers: f.seek(0) dll.write(f) f.seek(0, 2) f.flush() f.close() except Exception: pass return
def rewriteFramework(framework, frameworkMap): basename = os.path.splitext(os.path.basename(framework))[0] dyld = os.path.abspath(os.path.join(framework, basename)) macho = MachO(dyld) def changefunc(key): if key == dyld: return dyld dirname, filename = os.path.split(key) return frameworkMap.get(filename) macho.rewriteLoadCommands(changefunc) macho.write(open(dyld, "rb+"))
def rewriteFramework(framework, frameworkMap): basename = os.path.splitext(os.path.basename(framework))[0] dyld = os.path.abspath(os.path.join(framework, basename)) macho = MachO(dyld) def changefunc(key): if key == dyld: return dyld dirname, filename = os.path.split(key) return frameworkMap.get(filename) macho.rewriteLoadCommands(changefunc) macho.write(open(dyld, "rb+"))
def set_macos_sdk_version(filename, major, minor, revision): """ Overwrite the macOS SDK version declared in the given binary with the specified version. NOTE: currently, only version in the first arch slice is modified. """ # Validate values assert 0 <= major <= 255, "Invalid major version value!" assert 0 <= minor <= 255, "Invalid minor version value!" assert 0 <= revision <= 255, "Invalid revision value!" # Open binary binary = MachO(filename) header = binary.headers[0] # Find version command using helper version_cmd = _find_version_cmd(header) # Write new SDK version number version_cmd[1].sdk = major << 16 | minor << 8 | revision # Write changes back. with open(binary.filename, 'rb+') as fp: binary.write(fp)
def fix_exe_for_code_signing(filename): """ Fixes the Mach-O headers to make code signing possible. Code signing on OS X does not work out of the box with embedding .pkg archive into the executable. The fix is done this way: - Make the embedded .pkg archive part of the Mach-O 'String Table'. 'String Table' is at end of the OS X exe file so just change the size of the table to cover the end of the file. - Fix the size of the __LINKEDIT segment. Mach-O format specification: http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html """ exe_data = MachO(filename) # Every load command is a tupple: (cmd_metadata, segment, [section1, section2]) cmds = exe_data.headers[ 0].commands # '0' - Exe contains only one architecture. file_size = exe_data.headers[0].size ## Make the embedded .pkg archive part of the Mach-O 'String Table'. # Data about 'String Table' is in LC_SYMTAB load command. for c in cmds: if c[0].get_cmd_name() == 'LC_SYMTAB': data = c[1] # Increase the size of 'String Table' to cover the embedded .pkg file. new_strsize = file_size - data.stroff data.strsize = new_strsize ## Fix the size of the __LINKEDIT segment. # __LINKEDIT segment data is the 4th item in the executable. linkedit = cmds[3][1] new_segsize = file_size - linkedit.fileoff linkedit.filesize = new_segsize linkedit.vmsize = new_segsize ## Write changes back. fp = open(exe_data.filename, 'rb+') exe_data.write(fp) fp.close()
def fix_exe_for_code_signing(filename): """ Fixes the Mach-O headers to make code signing possible. Code signing on OS X does not work out of the box with embedding .pkg archive into the executable. The fix is done this way: - Make the embedded .pkg archive part of the Mach-O 'String Table'. 'String Table' is at end of the OS X exe file so just change the size of the table to cover the end of the file. - Fix the size of the __LINKEDIT segment. Mach-O format specification: http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html """ exe_data = MachO(filename) # Every load command is a tupple: (cmd_metadata, segment, [section1, section2]) cmds = exe_data.headers[0].commands # '0' - Exe contains only one architecture. file_size = exe_data.headers[0].size ## Make the embedded .pkg archive part of the Mach-O 'String Table'. # Data about 'String Table' is in LC_SYMTAB load command. for c in cmds: if c[0].get_cmd_name() == 'LC_SYMTAB': data = c[1] # Increase the size of 'String Table' to cover the embedded .pkg file. new_strsize = file_size - data.stroff data.strsize = new_strsize ## Fix the size of the __LINKEDIT segment. # __LINKEDIT segment data is the 4th item in the executable. linkedit = cmds[3][1] new_segsize = file_size - linkedit.fileoff linkedit.filesize = new_segsize linkedit.vmsize = new_segsize ## Write changes back. fp = open(exe_data.filename, 'rb+') exe_data.write(fp) fp.close()
def modify_object_macholib(cur_path, old_dir, new_dir): """ Modify MachO binary path_name by replacing old_dir with new_dir or the relative path to spack install root. The old install dir in LC_ID_DYLIB is replaced with the new install dir using py-macholib The old install dir in LC_LOAD_DYLIB is replaced with the new install dir using py-macholib The old install dir in LC_RPATH is replaced with the new install dir using using py-macholib """ if cur_path.endswith('.o'): return try: from macholib.MachO import MachO except ImportError as e: raise MissingMacholibException(e) def match_func(cpath): rpath = cpath.replace(old_dir, new_dir) return rpath dll = MachO(cur_path) dll.rewriteLoadCommands(match_func) try: f = open(dll.filename, 'rb+') for header in dll.headers: f.seek(0) dll.write(f) f.seek(0, 2) f.flush() f.close() except Exception: pass return
def mac_set_relative_dylib_deps(libname, distname): """ On Mac OS X set relative paths to dynamic library dependencies of `libname`. Relative paths allow to avoid using environment variable DYLD_LIBRARY_PATH. There are known some issues with DYLD_LIBRARY_PATH. Relative paths is more flexible mechanism. Current location of dependend libraries is derived from the location of the library path (paths start with '@loader_path'). 'distname' path of the library relative to dist directory of frozen executable. We need this to determine the level of directory level for @loader_path of binaries not found in dist directory. E.g. qt4 plugins are not in the same directory as Qt*.dylib files. Without using '@loader_path/../..' for qt plugins Mac OS X would not be able to resolve shared library dependencies and qt plugins will not be loaded. """ from macholib import util from macholib.MachO import MachO # Ignore bootloader otherwise PyInstaller fails with exception like # 'ValueError: total_size > low_offset (288 > 0)' if os.path.basename(libname) in _BOOTLOADER_FNAMES: return # Determine how many directories up is the directory with shared # dynamic libraries. '../' # E.g. ./qt4_plugins/images/ -> ./../../ parent_dir = '' # Check if distname is not only base filename. if os.path.dirname(distname): parent_level = len(os.path.dirname(distname).split(os.sep)) parent_dir = parent_level * (os.pardir + os.sep) def match_func(pth): """ For system libraries is still used absolute path. It is unchanged. """ # Leave system dynamic libraries unchanged if util.in_system_path(pth): return None # The older python.org builds that use system Tcl/Tk framework # have their _tkinter.cpython-*-darwin.so library linked against # /Library/Frameworks/Tcl.framework/Versions/8.5/Tcl and # /Library/Frameworks/Tk.framework/Versions/8.5/Tk, although the # actual frameworks are located in /System/Library/Frameworks. # Therefore, they slip through the above in_system_path() check, # and we need to exempt them manually. _exemptions = [ '/Library/Frameworks/Tcl.framework/', '/Library/Frameworks/Tk.framework/' ] if any([x in pth for x in _exemptions]): return None # Use relative path to dependent dynamic libraries based on the # location of the executable. return os.path.join('@loader_path', parent_dir, os.path.basename(pth)) # Rewrite mach headers with @loader_path. dll = MachO(libname) dll.rewriteLoadCommands(match_func) # Write changes into file. # Write code is based on macholib example. try: with open(dll.filename, 'rb+') as f: for header in dll.headers: f.seek(0) dll.write(f) f.seek(0, 2) f.flush() except Exception: pass
class CMachoFuzzer: def __init__(self): self.macho = None self.fuzz_properties = ["headers"] self.fuzz_sub_properties = { "headers": ["header", "commands", "headers"] } self.changes = [] self.change_list = [] def do_fuzz_headers(self): # Select a random header header = random.choice(self.macho.headers) idx = self.macho.headers.index(header) self.changes.append(["Header %d" % idx]) prop = random.choice(self.fuzz_sub_properties["headers"]) if prop == "header": fields = random.choice(header.header._fields_) field = fields[0] change_name = "header %d field %s" % (idx, field) if change_name in self.change_list or field in BANNED_FIELDS: #print "Ignoring already applied change %s" % change_name del self.changes[len(self.changes) - 1] return self.changes[len(self.changes) - 1].append("Field %s" % field) l = "header.header.%s = %d" % (field, get_random_value(fields[1])) exec(l) self.change_list.append(change_name) elif prop == "commands": cmd = random.choice(header.commands) idx = header.commands.index(cmd) self.changes[len(self.changes) - 1].append("Command %d" % idx) subidx = random.randint(0, len(cmd) - 1) subcmd = cmd[subidx] if '_fields_' in dir(subcmd): if len(subcmd._fields_) > 0: fields = random.choice(subcmd._fields_) field = fields[0] self.changes[len(self.changes) - 1].append("Field %s" % field) str_type = str(type(eval("subcmd.%s" % field))) if str_type in SUPPORTED_FIELD_TYPES: l = "subcmd.%s = " % field l += str(get_random_value(fields[1])) exec(l) else: #print "Ignoring unsupported field type", str_type, field del self.changes[len(self.changes) - 1] else: print "Ignoring empty subcmd", subcmd del self.changes[len(self.changes) - 1] elif type(subcmd) is str: #print "Ignoring unsupported (by macholib) string sub-command" del self.changes[len(self.changes) - 1] else: print type(subcmd), subcmd if type(subcmd) is list and len(subcmd) > 0: field = random.choice(subcmd) subidx = subcmd.index(field) self.changes[len(self.changes) - 1].append( "List element %d" % subcmd.index(field)) fields = random.choice(field._fields_) field_name = fields[0] self.changes[len(self.changes) - 1].append("Field %s" % field_name) l = "field.%s = " % field_name l += str(get_random_value(fields[1])) exec(l) else: del self.changes[len(self.changes) - 1] #self.changes[len(self.changes)-1].append("Sub-command %d" % sub_idx) elif prop == "headers": del self.changes[len(self.changes) - 1] #print "Not yet supported headers" #raise Exception("Implement headers") else: del self.changes[len(self.changes) - 1] def do_fuzz_internal(self): assert (self.macho is not None) element = random.choice(self.fuzz_properties) if element == "headers": self.do_fuzz_headers() else: raise Exception("Unknown element to fuzz %s" % repr(element)) def fuzz(self, filename, output_filename): self.macho = MachO(filename) changes = random.randint(1, 25) for i in range(changes * 5): self.do_fuzz_internal() if len(self.change_list) == changes: break # Copy the contents of the original file to the output file f = open(output_filename, "wb+") f.write(open(filename, "rb").read()) f.close() # Update it's contents f = open(output_filename, "rb+") self.macho.write(f) f.close() # And write the .diff file f = open(output_filename + ".diff", "wb") f.write("# Original file created by 'MachO Mutator' was %s\n" % filename) for change in self.changes: print "# CHANGE: %s" % ", ".join(change) f.write("# CHANGE: %s\n" % ", ".join(change)) f.close() os.system("radiff2 %s %s" % (filename, output_filename))
def mac_set_relative_dylib_deps(libname, distname): """ On Mac OS X set relative paths to dynamic library dependencies of `libname`. Relative paths allow to avoid using environment variable DYLD_LIBRARY_PATH. There are known some issues with DYLD_LIBRARY_PATH. Relative paths is more flexible mechanism. Current location of dependend libraries is derived from the location of the library path (paths start with '@loader_path'). 'distname' path of the library relative to dist directory of frozen executable. We need this to determine the level of directory level for @loader_path of binaries not found in dist directory. E.g. qt4 plugins are not in the same directory as Qt*.dylib files. Without using '@loader_path/../..' for qt plugins Mac OS X would not be able to resolve shared library dependencies and qt plugins will not be loaded. """ from macholib import util from macholib.MachO import MachO # Ignore bootloader otherwise PyInstaller fails with exception like # 'ValueError: total_size > low_offset (288 > 0)' if os.path.basename(libname) in _BOOTLOADER_FNAMES: return # Determine how many directories up is the directory with shared # dynamic libraries. '../' # E.g. ./qt4_plugins/images/ -> ./../../ parent_dir = '' # Check if distname is not only base filename. if os.path.dirname(distname): parent_level = len(os.path.dirname(distname).split(os.sep)) parent_dir = parent_level * (os.pardir + os.sep) def match_func(pth): """ For system libraries is still used absolute path. It is unchanged. """ # Match non system dynamic libraries. if not util.in_system_path(pth): # Use relative path to dependend dynamic libraries bases on # location of the executable. return os.path.join('@loader_path', parent_dir, os.path.basename(pth)) # Rewrite mach headers with @loader_path. dll = MachO(libname) dll.rewriteLoadCommands(match_func) # Write changes into file. # Write code is based on macholib example. try: with open(dll.filename, 'rb+') as f: for header in dll.headers: f.seek(0) dll.write(f) f.seek(0, 2) f.flush() except Exception: pass
def fix_exe_for_code_signing(filename): """ Fixes the Mach-O headers to make code signing possible. Code signing on Mac OS does not work out of the box with embedding .pkg archive into the executable. The fix is done this way: - Make the embedded .pkg archive part of the Mach-O 'String Table'. 'String Table' is at end of the Mac OS exe file, so just change the size of the table to cover the end of the file. - Fix the size of the __LINKEDIT segment. Note: the above fix works only if the single-arch thin executable or the last arch slice in a multi-arch fat executable is not signed, because LC_CODE_SIGNATURE comes after LC_SYMTAB, and because modification of headers invalidates the code signature. On modern arm64 macOS, code signature is mandatory, and therefore compilers create a dummy signature when executable is built. In such cases, that signature needs to be removed before this function is called. Mach-O format specification: http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html """ # Estimate the file size after data was appended file_size = os.path.getsize(filename) # Take the last available header. A single-arch thin binary contains a single slice, while a multi-arch fat binary # contains multiple, and we need to modify the last one, which is adjacent to the appended data. executable = MachO(filename) header = executable.headers[-1] # Sanity check: ensure the executable slice is not signed (otherwise signature's section comes last in the # __LINKEDIT segment). sign_sec = [cmd for cmd in header.commands if cmd[0].cmd == LC_CODE_SIGNATURE] assert len(sign_sec) == 0, "Executable contains code signature!" # Find __LINKEDIT segment by name (16-byte zero padded string) __LINKEDIT_NAME = b'__LINKEDIT\x00\x00\x00\x00\x00\x00' linkedit_seg = [cmd for cmd in header.commands if cmd[0].cmd == LC_SEGMENT_64 and cmd[1].segname == __LINKEDIT_NAME] assert len(linkedit_seg) == 1, "Expected exactly one __LINKEDIT segment!" linkedit_seg = linkedit_seg[0][1] # Take the segment command entry # Find SYMTAB section symtab_sec = [cmd for cmd in header.commands if cmd[0].cmd == LC_SYMTAB] assert len(symtab_sec) == 1, "Expected exactly one SYMTAB section!" symtab_sec = symtab_sec[0][1] # Take the symtab command entry # The string table is located at the end of the SYMTAB section, which in turn is the last section in the __LINKEDIT # segment. Therefore, the end of SYMTAB section should be aligned with the end of __LINKEDIT segment, and in turn # both should be aligned with the end of the file (as we are in the last or the only arch slice). # # However, when removing the signature from the executable using codesign under Mac OS 10.13, the codesign utility # may produce an invalid file, with the declared length of the __LINKEDIT segment (linkedit_seg.filesize) pointing # beyond the end of file, as reported in issue #6167. # # We can compensate for that by not using the declared sizes anywhere, and simply recompute them. In the final # binary, the __LINKEDIT segment and the SYMTAB section MUST end at the end of the file (otherwise, we have bigger # issues...). So simply recompute the declared sizes as difference between the final file length and the # corresponding start offset. symtab_sec.strsize = file_size - symtab_sec.stroff linkedit_seg.filesize = file_size - linkedit_seg.fileoff # Compute new vmsize by rounding filesize up to full page size. page_size = (0x4000 if _get_arch_string(header.header).startswith('arm64') else 0x1000) linkedit_seg.vmsize = math.ceil(linkedit_seg.filesize / page_size) * page_size # NOTE: according to spec, segments need to be aligned to page boundaries: 0x4000 (16 kB) for arm64, 0x1000 (4 kB) # for other arches. But it seems we can get away without rounding and padding the segment file size - perhaps # because it is the last one? # Write changes with open(filename, 'rb+') as fp: executable.write(fp) # In fat binaries, we also need to adjust the fat header. macholib as of version 1.14 does not support this, so we # need to do it ourselves... if executable.fat: from macholib.mach_o import (FAT_MAGIC, FAT_MAGIC_64, fat_arch, fat_arch64, fat_header) with open(filename, 'rb+') as fp: # Taken from MachO.load_fat() implementation. The fat header's signature has already been validated when we # loaded the file for the first time. fat = fat_header.from_fileobj(fp) if fat.magic == FAT_MAGIC: archs = [fat_arch.from_fileobj(fp) for i in range(fat.nfat_arch)] elif fat.magic == FAT_MAGIC_64: archs = [fat_arch64.from_fileobj(fp) for i in range(fat.nfat_arch)] # Adjust the size in the fat header for the last slice. arch = archs[-1] arch.size = file_size - arch.offset # Now write the fat headers back to the file. fp.seek(0) fat.to_fileobj(fp) for arch in archs: arch.to_fileobj(fp)
class CMachoFuzzer: def __init__(self): self.macho = None self.fuzz_properties = ["headers"] self.fuzz_sub_properties = {"headers":["header", "commands", "headers"]} self.changes = [] self.change_list = [] def do_fuzz_headers(self): # Select a random header header = random.choice(self.macho.headers) idx = self.macho.headers.index(header) self.changes.append(["Header %d" % idx]) prop = random.choice(self.fuzz_sub_properties["headers"]) if prop == "header": fields = random.choice(header.header._fields_) field = fields[0] change_name = "header %d field %s" % (idx, field) if change_name in self.change_list or field in BANNED_FIELDS: #print "Ignoring already applied change %s" % change_name del self.changes[len(self.changes)-1] return self.changes[len(self.changes)-1].append("Field %s" % field) l = "header.header.%s = %d" % (field, get_random_value(fields[1])) exec(l) self.change_list.append(change_name) elif prop == "commands": cmd = random.choice(header.commands) idx = header.commands.index(cmd) self.changes[len(self.changes)-1].append("Command %d" % idx) subidx = random.randint(0, len(cmd)-1) subcmd = cmd[subidx] if '_fields_' in dir(subcmd): if len(subcmd._fields_) > 0: fields = random.choice(subcmd._fields_) field = fields[0] self.changes[len(self.changes)-1].append("Field %s" % field) str_type = str(type(eval("subcmd.%s" % field))) if str_type in SUPPORTED_FIELD_TYPES: l = "subcmd.%s = " % field l += str(get_random_value(fields[1])) exec(l) else: #print "Ignoring unsupported field type", str_type, field del self.changes[len(self.changes)-1] else: print "Ignoring empty subcmd", subcmd del self.changes[len(self.changes)-1] elif type(subcmd) is str: #print "Ignoring unsupported (by macholib) string sub-command" del self.changes[len(self.changes)-1] else: print type(subcmd), subcmd if type(subcmd) is list and len(subcmd) > 0: field = random.choice(subcmd) subidx = subcmd.index(field) self.changes[len(self.changes)-1].append("List element %d" % subcmd.index(field)) fields = random.choice(field._fields_) field_name = fields[0] self.changes[len(self.changes)-1].append("Field %s" % field_name) l = "field.%s = " % field_name l += str(get_random_value(fields[1])) exec(l) else: del self.changes[len(self.changes)-1] #self.changes[len(self.changes)-1].append("Sub-command %d" % sub_idx) elif prop == "headers": del self.changes[len(self.changes)-1] #print "Not yet supported headers" #raise Exception("Implement headers") else: del self.changes[len(self.changes)-1] def do_fuzz_internal(self): assert(self.macho is not None) element = random.choice(self.fuzz_properties) if element == "headers": self.do_fuzz_headers() else: raise Exception("Unknown element to fuzz %s" % repr(element)) def fuzz(self, filename, output_filename): self.macho = MachO(filename) changes = random.randint(1, 25) for i in range(changes*5): self.do_fuzz_internal() if len(self.change_list) == changes: break # Copy the contents of the original file to the output file f = open(output_filename, "wb+") f.write(open(filename, "rb").read()) f.close() # Update it's contents f = open(output_filename, "rb+") self.macho.write(f) f.close() # And write the .diff file f = open(output_filename + ".diff", "wb") f.write("# Original file created by 'MachO Mutator' was %s\n" % filename) for change in self.changes: print "# CHANGE: %s" % ", ".join(change) f.write("# CHANGE: %s\n" % ", ".join(change)) f.close() os.system("radiff2 %s %s" % (filename, output_filename))
class HeaderMangler(object): def __init__(self, binary_class, arch, exec_path, strategy, verbose=False): self.path = exec_path self.mangled_text_segment = False self.header_analyzed = False self.shrink_by = 0 self.header_metadata = None self.strategy = strategy self.verbose = verbose self.fname = None self.faddr = 0 self.fsize = 0 self.arch = arch self.binary_class = binary_class self.total_shrink = 0 self.entry_func = [] self.total_fn_size = 0 self.zero_pad = 0 self.text_pad = 0 self.TEXT_orig = None self.fh = None self.out_fh = None self.binary_class = binary_class if arch == ARCH_ARM: self.code_mangler = ARM_Mangler() elif arch == ARCH_X86_64: self.code_mangler = X86_64_Mangler() else: raise Exception("Unknown arch: " + arch) def analyze_header(self): self.macho = MachO(self.path) self.header_metadata = HeaderMetadata(self.macho, self.binary_class) self.header_metadata.read_tables() self.header_analyzed = True def write_header(self, fh, total_shrink): h = self.macho.headers[0] h.size -= total_shrink self.macho.write(fh) def adjust_nl_pointers(self, nldata, shirnk): return nldata def adjust_la_pointers(self, ladata, shrink): off = 0 new_ladata = "" size = LA_PTR_SIZE_32 if self.binary_class == CLASS_MACHO else LA_PTR_SIZE_64 while off < len(ladata): if self.binary_class == CLASS_MACHO64: lptr = la_ptr_64.from_str(ladata[off:(off + size)]) lptr.ptr = big_swap_u64(lptr.ptr) lptr.ptr -= shrink lptr.ptr = little_swap_u64(lptr.ptr) new_ladata += lptr.to_str() off += LA_PTR_SIZE_64 else: lptr = la_ptr_32.from_str(ladata[off:(off + size)]) lptr.ptr = big_swap_u32(lptr.ptr) lptr.ptr -= shrink lptr.ptr = little_swap_u32(lptr.ptr) new_ladata += lptr.to_str() off += LA_PTR_SIZE_32 return new_ladata def read_TEXT_segment(self, in_fh): in_fh.seek(self.header_metadata.textoffset) self.TEXT_orig = in_fh.read(self.header_metadata.TEXTsize - self.header_metadata.textoffset) def adjust_symtable(self, fname): ntype = 36 if self.binary_class == CLASS_MACHO64 else 15 return len(self.header_metadata.nlists) copy = list(self.header_metadata.nlists) for i in xrange(len(copy) - 1, -1, -1): if fname in self.header_metadata.nlists[i][1]: if self.header_metadata.nlists[i][0].n_type == ntype: del self.header_metadata.nlists[i - 1] del self.header_metadata.nlists[i - 1] del self.header_metadata.nlists[i - 1] del self.header_metadata.nlists[i - 1] i -= 4 else: del self.header_metadata.nlists[i] elif self.header_metadata.nlists[i][ 0].n_type == ntype and "_" in self.header_metadata.nlists[ i][1]: self.header_metadata.nlists[i][0].n_value -= self.total_fn_size self.header_metadata.nlists[i - 1][0].n_value -= self.total_fn_size elif "_main" in self.header_metadata.nlists[i][1]: self.header_metadata.nlists[i][0].n_value -= self.total_fn_size self.header_metadata.nlists[i - 1][0].n_value -= self.total_fn_size return len(self.header_metadata.nlists) def adjust_function_starts(self, removed_funcs, all_funcs): funcs = self.header_metadata.read_func_starts() x = decode_uleb128(funcs[FUNCS_START_BA], 0) funcs_ba = bytearray() addr = x[ULEB_DATA] old_offset = 0 i = 0 found = False while x[ULEB_OFF] != funcs[FUNCS_START_SIZE]: found = False for func in removed_funcs: if all_funcs[func][ADDR] == addr: found = True break old_offset = x[ULEB_OFF] x = decode_uleb128(funcs[FUNCS_START_BA], x[ULEB_OFF]) addr += x[ULEB_DATA] if found: pass #print "addr: " + hex(x[ULEB_OFF] - old_offset) #funcs_ba += bytearray(x[ULEB_OFF] - old_offset) else: funcs_ba += funcs[FUNCS_START_BA][i:old_offset] i = old_offset # TODO calculate the function starts and write them temp = bytearray() for i in xrange(0, len(funcs[FUNCS_START_BA])): temp += bytes(0) self.header_metadata.funcs_start_data = temp def adjust_offset(self, off, symoff, shrink, total_shrink): if off <= 0: return off elif off > symoff: return off - total_shrink else: return off - shrink def remove_funcs(self, funcs, all_funcs, ordered_funcs): if not self.header_analyzed: self.analyze_header() self.fh = open(self.path, 'rb') self.read_TEXT_segment(self.fh) for f in funcs: self.remove_func(f, all_funcs[f], all_funcs) # after we've made the changes to remove the functions, adjust the load commands self.adjust_lcs() self.adjust_function_starts(funcs, all_funcs) if self.strategy == STRATEGY_REMOVE and self.verbose: print "__text size reduced by: " + str(self.total_fn_size + self.total_shrink) self.out_fh = open(self.path + "-debloated", 'wb') self.write_header(self.out_fh, self.total_shrink) def adjust_lcs(self): if self.strategy == STRATEGY_NOPS: return # stubs are in ARM mode (4 bytes / insn) and __text is in thumb mode (2 bytes / insn) # force the alignment of the stubs region to 4 bytes if self.arch == ARCH_ARM and self.total_fn_size % 4 != 0: self.text_pad = 2 self.fsize -= self.text_pad self.total_fn_size -= self.text_pad self.zero_pad -= self.text_pad # we can't possibly remove an odd number of bytes from ARM code, so something has gone wrong if self.arch == ARCH_ARM: assert (self.total_fn_size % 4 == 0) segtext = self.header_metadata.sections["__TEXT"]["__TEXT"] secttext = self.header_metadata.sections["__TEXT"]["__text"] symoff = self.header_metadata.lcs[LC_SYMTAB].symoff for segname, _ in self.header_metadata.sections.iteritems(): for sectname, cmd in self.header_metadata.sections[ segname].iteritems(): if isinstance(cmd, segment_command_64) or isinstance( cmd, segment_command): if SEG_DATA in cmd.segname: cmd.fileoff -= self.shrink_by if SEG_LINKEDIT in cmd.segname: cmd.filesize = cmd.filesize - self.total_shrink - self.shrink_by elif SEG_TEXT in segname and SECT_TEXT not in cmd.sectname: cmd.addr -= self.total_fn_size cmd.offset -= self.total_fn_size else: if cmd.offset > secttext.offset and SEG_TEXT in segname: cmd.offset -= self.total_fn_size segtext.filesize -= self.shrink_by secttext.size -= self.total_fn_size self.header_metadata.lcs[ LC_DYLD_INFO_ONLY].rebase_off = self.adjust_offset( self.header_metadata.lcs[LC_DYLD_INFO_ONLY].rebase_off, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[ LC_DYLD_INFO_ONLY].bind_off = self.adjust_offset( self.header_metadata.lcs[LC_DYLD_INFO_ONLY].bind_off, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[ LC_DYLD_INFO_ONLY].weak_bind_off = self.adjust_offset( self.header_metadata.lcs[LC_DYLD_INFO_ONLY].weak_bind_off, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[ LC_DYLD_INFO_ONLY].lazy_bind_off = self.adjust_offset( self.header_metadata.lcs[LC_DYLD_INFO_ONLY].lazy_bind_off, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[ LC_DYLD_INFO_ONLY].export_off = self.adjust_offset( self.header_metadata.lcs[LC_DYLD_INFO_ONLY].export_off, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[LC_DYSYMTAB].tocoff = self.adjust_offset( self.header_metadata.lcs[LC_DYSYMTAB].tocoff, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[ LC_DYSYMTAB].indirectsymoff = self.adjust_offset( self.header_metadata.lcs[LC_DYSYMTAB].indirectsymoff, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[LC_DYSYMTAB].modtaboff = self.adjust_offset( self.header_metadata.lcs[LC_DYSYMTAB].modtaboff, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[ LC_DYSYMTAB].extrefsymoff = self.adjust_offset( self.header_metadata.lcs[LC_DYSYMTAB].extrefsymoff, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[LC_DYSYMTAB].extreloff = self.adjust_offset( self.header_metadata.lcs[LC_DYSYMTAB].extreloff, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[LC_DYSYMTAB].locreloff = self.adjust_offset( self.header_metadata.lcs[LC_DYSYMTAB].locreloff, symoff, self.shrink_by, self.total_shrink) if LC_ENCRYPTION_INFO in self.header_metadata.lcs: self.header_metadata.lcs[ LC_ENCRYPTION_INFO].cryptoff = self.adjust_offset( self.header_metadata.lcs[LC_ENCRYPTION_INFO].cryptoff, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[ LC_FUNCTION_STARTS].dataoff = self.adjust_offset( self.header_metadata.lcs[LC_FUNCTION_STARTS].dataoff, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[LC_DATA_IN_CODE].dataoff = self.adjust_offset( self.header_metadata.lcs[LC_DATA_IN_CODE].dataoff, symoff, self.shrink_by, self.total_shrink) if LC_DYLIB_CODE_SIGN_DRS in self.header_metadata.lcs: self.header_metadata.lcs[ LC_DYLIB_CODE_SIGN_DRS].dataoff = self.adjust_offset( self.header_metadata.lcs[LC_DYLIB_CODE_SIGN_DRS].dataoff, symoff, self.shrink_by, self.total_shrink) if LC_CODE_SIGNATURE in self.header_metadata.lcs: self.header_metadata.lcs[ LC_CODE_SIGNATURE].dataoff = self.adjust_offset( self.header_metadata.lcs[LC_CODE_SIGNATURE].dataoff, symoff, self.shrink_by, self.total_shrink) self.header_metadata.lcs[LC_SYMTAB].symoff -= self.shrink_by self.header_metadata.lcs[LC_SYMTAB].stroff -= self.total_shrink def remove_func(self, fname, func, all_funcs): fsize = func[SIZE] faddr = func[ADDR] diff = func[DIFF] if self.strategy == STRATEGY_NOPS: return if diff == 0: prev_func = func[PREV_FUNC] if prev_func is not None: diff = all_funcs[prev_func][DIFF] self.fname = fname self.faddr = faddr self.fsize = self.shrink_by = fsize + diff self.total_fn_size += self.fsize self.entry_func = all_funcs["entry0"] self.zero_pad += self.shrink_by self.shrink_by = 0 segtext = self.header_metadata.sections["__TEXT"]["__TEXT"] secttext = self.header_metadata.sections["__TEXT"]["__text"] # wipe unnecessary symbols diff_syms = len(self.header_metadata.nlists) no_syms = self.adjust_symtable(fname) diff_syms -= no_syms # add the size of the number of symbols wiped from the symtable if self.binary_class == CLASS_MACHO64: current_shrink = self.shrink_by + ( self.header_metadata.lcs[LC_SYMTAB].nsyms - no_syms) * NLIST64_SIZE else: current_shrink = self.shrink_by + ( self.header_metadata.lcs[LC_SYMTAB].nsyms - no_syms) * NLIST32_SIZE self.total_shrink += current_shrink self.header_metadata.lcs[LC_SYMTAB].nsyms = no_syms if self.entry_func[ADDR] > self.faddr: self.header_metadata.lcs[LC_MAIN].entryoff -= self.fsize self.header_metadata.lcs[LC_DYSYMTAB].nlocalsym = dec_syms( self.header_metadata.lcs[LC_DYSYMTAB].nlocalsym, diff_syms) self.header_metadata.lcs[LC_DYSYMTAB].iextdefsym = dec_syms( self.header_metadata.lcs[LC_DYSYMTAB].iextdefsym, diff_syms) self.header_metadata.lcs[LC_DYSYMTAB].iundefsym = dec_syms( self.header_metadata.lcs[LC_DYSYMTAB].iundefsym, diff_syms)
def fix_exe_for_code_signing(filename): """ Fixes the Mach-O headers to make code signing possible. Code signing on OS X does not work out of the box with embedding .pkg archive into the executable. The fix is done this way: - Make the embedded .pkg archive part of the Mach-O 'String Table'. 'String Table' is at end of the OS X exe file so just change the size of the table to cover the end of the file. - Fix the size of the __LINKEDIT segment. Note: the above fix works only if the single-arch thin executable or the last arch slice in a multi-arch fat executable is not signed, because LC_CODE_SIGNATURE comes after LC_SYMTAB, and because modification of headers invalidates the code signature. On modern arm64 macOS, code signature is mandatory, and therefore compilers create a dummy signature when executable is built. In such cases, that signature needs to be removed before this function is called. Mach-O format specification: http://developer.apple.com/documentation/Darwin/Reference/ManPages/man5/Mach-O.5.html """ # Estimate the file size after data was appended file_size = os.path.getsize(filename) # Take the last available header. A single-arch thin binary contains a # single slice, while a multi-arch fat binary contains multiple, and we # need to modify the last one, which is adjacent to the appended data. executable = MachO(filename) header = executable.headers[-1] # Sanity check: ensure the executable slice is not signed (otherwise # signature's section comes last in the __LINKEDIT segment). sign_sec = [cmd for cmd in header.commands if cmd[0].cmd == LC_CODE_SIGNATURE] assert len(sign_sec) == 0, "Executable contains code signature!" # Find __LINKEDIT segment by name (16-byte zero padded string) __LINKEDIT_NAME = b'__LINKEDIT\x00\x00\x00\x00\x00\x00' linkedit_seg = [cmd for cmd in header.commands if cmd[0].cmd == LC_SEGMENT_64 and cmd[1].segname == __LINKEDIT_NAME] assert len(linkedit_seg) == 1, "Expected exactly one __LINKEDIT segment!" linkedit_seg = linkedit_seg[0][1] # Take the segment command entry # Find SYMTAB section symtab_sec = [cmd for cmd in header.commands if cmd[0].cmd == LC_SYMTAB] assert len(symtab_sec) == 1, "Expected exactly one SYMTAB section!" symtab_sec = symtab_sec[0][1] # Take the symtab command entry # Sanity check; the string table is located at the end of the SYMTAB # section, which in turn is the last section in the __LINKEDIT segment assert linkedit_seg.fileoff + linkedit_seg.filesize == \ symtab_sec.stroff + symtab_sec.strsize, "Sanity check failed!" # Compute the old/declared file size (header.offset is zero for # single-arch thin binaries) old_file_size = \ header.offset + linkedit_seg.fileoff + linkedit_seg.filesize delta = file_size - old_file_size # Expand the string table in SYMTAB section... symtab_sec.strsize += delta # .. as well as its parent __LINEDIT segment linkedit_seg.filesize += delta # FIXME: do we actually need to adjust in-memory size as well? It # seems unnecessary, as we have no use for the extended part being # loaded in the executable's address space... #linkedit_seg.vmsize += delta # NOTE: according to spec, segments need to be aligned to page # boundaries: 0x4000 (16 kB) for arm64, 0x1000 (4 kB) for other arches. # But it seems we can get away without rounding and padding the segment # size - perhaps because it's the last one? # Write changes with open(filename, 'rb+') as fp: executable.write(fp) # In fat binaries, we also need to adjust the fat header. macholib as # of version 1.14 does not support this, so we need to do it ourselves... if executable.fat: from macholib.mach_o import FAT_MAGIC, FAT_MAGIC_64 from macholib.mach_o import fat_header, fat_arch, fat_arch64 with open(filename, 'rb+') as fp: # Taken from MachO.load_fat() implementation. The fat # header's signature has already been validated when we # loaded the file for the first time. fat = fat_header.from_fileobj(fp) if fat.magic == FAT_MAGIC: archs = [fat_arch.from_fileobj(fp) for i in range(fat.nfat_arch)] elif fat.magic == FAT_MAGIC_64: archs = [fat_arch64.from_fileobj(fp) for i in range(fat.nfat_arch)] # Adjust the size in the fat header for the last slice arch = archs[-1] arch.size = file_size - arch.offset # Now write the fat headers back to the file fp.seek(0) fat.to_fileobj(fp) for arch in archs: arch.to_fileobj(fp)