def test_ios13_absolute_method_lists(self): # Given a binary compiled with a minimum deployment target of iOS 13 parser = MachoParser( TestObjcRuntimeDataParser.IOS13_ABSOLUTE_METHOD_LIST_BIN_PATH) binary = parser.get_arm64_slice() binary.get_minimum_deployment_target() # When the Objective C methods within the binary are parsed dyld_info_parser = DyldInfoParser(binary) objc_parser = ObjcRuntimeDataParser(binary, dyld_info_parser) selref_selector_map = objc_parser.selrefs_to_selectors() # Then the method structures are correctly parsed assert len(selref_selector_map) == 3 s1 = selref_selector_map[VirtualMemoryPointer(0x10000D380)] assert s1.implementation is None assert s1.is_external_definition is True assert s1.name == "role" s2 = selref_selector_map[VirtualMemoryPointer(0x10000D388)] assert s2.implementation is None assert s2.is_external_definition is True assert s2.name == "initWithName:sessionRole:" s3 = selref_selector_map[VirtualMemoryPointer(0x10000D378)] assert s3.implementation == VirtualMemoryPointer(0x100006354) assert s3.is_external_definition is False assert s3.name == "viewDidLoad"
def get_function_boundaries( self) -> Set[Tuple[VirtualMemoryPointer, VirtualMemoryPointer]]: cursor = self._db_handle.execute( "SELECT entry_point, end_address FROM function_boundaries") with closing(cursor): return {(VirtualMemoryPointer(a), VirtualMemoryPointer(b)) for a, b in cursor}
def get_basic_block_boundaries( self, entry_point: VirtualMemoryPointer ) -> List[Tuple[VirtualMemoryPointer, VirtualMemoryPointer]]: """Given the function starting at the provided address, return the list of (start_addr, end_addr) basic blocks. """ cursor = self._db_handle.execute( "SELECT start_address, end_address FROM basic_blocks WHERE entry_point=?", (entry_point, )) with closing(cursor): return [(VirtualMemoryPointer(x[0]), VirtualMemoryPointer(x[1])) for x in cursor]
def string_xrefs_to( self, string_literal: str ) -> List[Tuple[VirtualMemoryPointer, VirtualMemoryPointer]]: """Retrieve each code location that loads the provided (C or CF) string. Returns a tuple of (function entry point, instruction which completes the string load) """ c = self._db_handle.cursor() xrefs_query = c.execute( "SELECT accessor_func_start_address, accessor_address from string_xrefs WHERE string_literal=?", (string_literal, ), ).fetchall() string_xrefs = [(VirtualMemoryPointer(x[0]), VirtualMemoryPointer(x[1])) for x in xrefs_query] return string_xrefs
def _add_superclass_or_base_class_name_to_classes( self, classes: List[ObjcClass], dyld_info_parser: DyldInfoParser) -> None: """Iterate each ObjC class/category, and backfill its superclass/base_class name, respectively. Linking super/base_classes needs two data-sources, depending on whether the super/base_class is imported or not: - To retrieve the class names of imported super/base classes, this needs the map of bound dyld symbols - To retrieve the class names of locally implemented classes, this needs the full list of ObjcClasses """ # For efficiency, build a map of (struct address -> class name) addr_to_class_names = { x.raw_struct.binary_offset: x.name for x in classes } for objc_class_or_category in classes: raw_struct = objc_class_or_category.raw_struct # This method uses the fact that `struct __objc_data.superclass` and `struct __objc_category.base_class` # have the same memory layout, being placed one 64-bit word after the start of the structure. base_class_field_addr = VirtualMemoryPointer( raw_struct.binary_offset + sizeof(c_uint64)) # If the base class is an imported classref, the imported classref will be bound to its runtime load address # by dyld. Look up whether we have an import-binding for the `base_class` field of this structure. if base_class_field_addr in dyld_info_parser.dyld_stubs_to_symbols: imported_base_class_sym = dyld_info_parser.dyld_stubs_to_symbols[ base_class_field_addr] base_class_name = imported_base_class_sym.name else: dereferenced_classref = VirtualMemoryPointer( self.binary.read_word(base_class_field_addr)) # The base class is implemented in this binary, and we should have a corresponding ObjcClass object. if dereferenced_classref in addr_to_class_names: base_class_name = addr_to_class_names[ dereferenced_classref] else: logging.error( f"Failed to find a corresponding ObjC class for ref {dereferenced_classref} from " f"{objc_class_or_category}") base_class_name = "$_Unknown_Class" if isinstance(objc_class_or_category, ObjcCategory): objc_class_or_category.base_class = base_class_name # Update the name attribute to hold the parsed category name objc_class_or_category.name = f"{base_class_name} ({objc_class_or_category.category_name})" else: objc_class_or_category.superclass_name = base_class_name
def classref_for_class_name( self, class_name: str) -> Optional[VirtualMemoryPointer]: """Given a class name, try to find a classref for it. """ classrefs = [ addr for addr, name in self.imported_symbols_to_symbol_names.items() if name == class_name and self.binary.section_name_for_address( addr) == "__objc_classrefs" ] if len(classrefs): return classrefs[0] # TODO(PT): this is expensive! We should do one analysis step of __objc_classrefs and create a map. classref_locations, classref_destinations = self.binary.read_pointer_section( "__objc_classrefs") # is it a local class? class_locations = [ x.raw_struct.binary_offset for x in self.objc_classes() if x.name == class_name ] if not len(class_locations): # unknown class name return None class_location = VirtualMemoryPointer(class_locations[0]) if class_location not in classref_destinations: # unknown class name return None classref_index = classref_destinations.index(class_location) return classref_locations[classref_index]
def _stringref_for_cfstring(self, string: str) -> Optional[VirtualMemoryPointer]: """Try to find the stringref in __cfstrings for a provided Objective-C string literal. If the string is not present in the __cfstrings section, this method returns None. """ # TODO(PT): This is SLOW and WASTEFUL!!! # These transformations should be done ONCE on initial analysis! cfstrings_section = self.binary.section_with_name( "__cfstring", "__DATA") if not cfstrings_section: return None sizeof_cfstring = sizeof( CFString64) if self.binary.is_64bit else sizeof(CFString32) cfstrings_base = cfstrings_section.address cfstrings_count = int( (cfstrings_section.end_address - cfstrings_section.address) / sizeof_cfstring) for i in range(cfstrings_count): cfstring_addr = cfstrings_base + (i * sizeof_cfstring) cfstring = self.binary.read_struct(cfstring_addr, CFStringStruct, virtual=True) # check if this is the string the user requested string_address = cfstring.literal if self.binary.read_string_at_address(string_address) == string: return VirtualMemoryPointer(cfstring_addr) return None
def get_functions(self) -> Set[VirtualMemoryPointer]: """Get a list of the function entry points defined in LC_FUNCTION_STARTS. This includes objective-c methods. Returns: A list of VirtualMemoryPointers corresponding to each function's entry point. """ # TODO(PT): move read_uleb somewhere else from .dyld_info_parser import DyldInfoParser if self._functions_list: return self._functions_list # Cannot do anything without LC_FUNCTIONS_START if not self._function_starts_cmd: return set() functions_list = set() fs_start = self._function_starts_cmd.dataoff fs_size = self._function_starts_cmd.datasize fs_uleb = self.get_contents_from_address(fs_start, fs_size) address = int(self.get_virtual_base()) idx = 0 while idx < fs_size: address_delta, idx = DyldInfoParser.read_uleb(fs_uleb, idx) address += address_delta func_entry = VirtualMemoryPointer(address) functions_list.add(func_entry) self._functions_list = functions_list return self._functions_list
def read_pointer_section( self, section_name: str ) -> Tuple[List[VirtualMemoryPointer], List[VirtualMemoryPointer]]: """Read all the pointers in a section It is the caller's responsibility to only call this with a `section_name` which indicates a section which should only contain a pointer list. The return value is two lists of pointers. The first List contains the virtual addresses of each entry in the section. The second List contains the pointer values contained at each of these addresses. The indexes of these two lists are matched up; that is, list1[0] is the virtual address of the first pointer in the requested section, and list2[0] is the pointer value contained at that address. """ locations: List[VirtualMemoryPointer] = [] entries: List[VirtualMemoryPointer] = [] # PT: Assume a pointer-list-section will always be in __DATA or __DATA_CONST. True as far as I know. for segment in ["__DATA", "__DATA_CONST"]: section = self.section_with_name(section_name, segment) if section: break else: # Couldn't find the desired section return locations, entries section_base = section.address section_data = section.content binary_word = self.platform_word_type pointer_count = int(len(section_data) / sizeof(binary_word)) pointer_off = 0 for i in range(pointer_count): # convert section offset of entry to absolute virtual address locations.append(VirtualMemoryPointer(section_base + pointer_off)) data_end = pointer_off + sizeof(binary_word) val = binary_word.from_buffer( bytearray(section_data[pointer_off:data_end])).value entries.append(VirtualMemoryPointer(val)) pointer_off += sizeof(binary_word) return locations, entries
def get_contents_from_address(self, address: int, size: int, is_virtual: bool = False) -> bytearray: """Get a bytesarray from a specified address, size and virtualness TODO(FS): change all methods that use addresses as ints to the VirtualAddress/StaticAddress class pair to better express intent """ if is_virtual: return self.get_content_from_virtual_address(VirtualMemoryPointer(address), size) else: return self.get_bytes(StaticFilePointer(address), size)
def __init__( self, function_analyzer: "ObjcFunctionAnalyzer", instruction: CsInsn, patch_msgSend_destination: bool = True, container_function_boundary: Tuple[VirtualMemoryPointer, VirtualMemoryPointer] = None, ) -> None: if instruction.mnemonic not in ObjcUnconditionalBranchInstruction.UNCONDITIONAL_BRANCH_MNEMONICS: raise ValueError( f"ObjcUnconditionalBranchInstruction instantiated with" f" invalid mnemonic {instruction.mnemonic}") # an unconditional branch has the destination as the only operand super().__init__( instruction, VirtualMemoryPointer(instruction.operands[0].value.imm)) self.selref: Optional[ObjcSelref] = None self.selector: Optional[ObjcSelector] = None analyzer = MachoAnalyzer.get_analyzer(function_analyzer.binary) if container_function_boundary: if self.destination_address >= container_function_boundary[0]: if self.destination_address < container_function_boundary[1]: # Local basic-block branch within a function # print(f'{self.destination_address} local branch fast path') self.symbol = None self.is_external_c_call = False self.is_msgSend_call = False return called_sym = analyzer.callable_symbol_for_address( self.destination_address) if not called_sym: # Branch to an anonymous destination # Might be a basic block within a function or some other label # logging.debug(f'No symbol for branch destination {hex(self.destination_address)}') self.is_external_c_call = False self.is_msgSend_call = False self.symbol = None return self.symbol = called_sym.symbol_name self.is_external_c_call = called_sym.is_imported if called_sym.is_imported: if called_sym.symbol_name in self.OBJC_MSGSEND_FUNCTIONS: self.is_msgSend_call = True self.is_external_c_call = False if patch_msgSend_destination: self._patch_msgSend_destination(function_analyzer) else: self.is_msgSend_call = False else: self.is_msgSend_call = False
def _build_function_boundaries_index(self) -> None: """Iterate all the entry points listed in the binary metadata and compute the end-of-function address for each. The end-of-function address for each entry point is then stored in a DB table. To compute function boundaries, each function's basic blocks are determined. The end-address is then the final address in the final basic block. """ cursor = self._db_handle.cursor() sorted_entry_points = sorted(self.get_functions()) # Computing a function boundaries uses the next entry point address as a hint. For the last entry point in the # binary, use the end of the section as the hint. try: last_entry = sorted_entry_points[-1] except IndexError: pass else: section = self.binary.section_for_address(last_entry) assert section is not None and section.end_address >= last_entry sorted_entry_points.append( VirtualMemoryPointer(section.end_address)) for entry_point, end_address in pairwise(sorted_entry_points): # The end address of the function is the last instruction in the last basic block basic_blocks = [ x for x in self._compute_function_basic_blocks( entry_point, end_address) ] # If we found a function with no code, just skip it # This can happen in the assembly unit tests, where we insert a jump to a dummy __text label if len(basic_blocks) == 0: continue end_address = VirtualMemoryPointer( max((bb_end for _, bb_end in basic_blocks))) cursor.execute( "INSERT INTO function_boundaries (entry_point, end_address) VALUES (?, ?)", (entry_point, end_address)) cursor.executemany("INSERT INTO basic_blocks VALUES (?, ?, ?)", [(entry_point, t[0], t[1]) for t in basic_blocks]) with self._db_handle: cursor.close()
def strings_in_func( self, func_addr: VirtualMemoryPointer ) -> List[Tuple[VirtualMemoryPointer, str]]: """Fetch the list of strings referenced by the provided function. Returns a tuple of (instruction that completes the string load, loaded string literal) """ c = self._db_handle.cursor() xrefs: Iterable[Tuple[int, str]] = c.execute( "SELECT accessor_address, string_literal from string_xrefs WHERE accessor_func_start_address=?", (func_addr, ), ) string_loads = [(VirtualMemoryPointer(x[0]), x[1]) for x in xrefs] return string_loads
def _parse_stub_from_instructions(instr1: CsInsn, instr2: CsInsn, instr3: CsInsn) -> MachoImpStub: # TODO(PT): write CsInsn by hand to test this function # each stub follows one of two patterns # pattern 1: nop / ldr x16, <sym> / br x16 # pattern 2: adrp x16, <page> / ldr x16, [x16 <offset>] / br x16 # try parsing both of these formats patterns = [["nop", "ldr", "br"], ["adrp", "ldr", "br"]] # differentiate between patterns by looking at the opcode of the first instruction if instr1.mnemonic == patterns[0][0]: pattern_idx = 0 elif instr1.mnemonic == patterns[1][0]: pattern_idx = 1 else: # unknown stub format raise NotImplementedError({hex(instr1.address)}) expected_ops = patterns[pattern_idx] for idx, op in enumerate([instr1, instr2, instr3]): # sanity check if op.mnemonic != expected_ops[idx]: raise RuntimeError( f"Expected instr {hex(op.address)} (idx {idx}) to be {expected_ops[idx]}" f" while parsing stub, was instead {op.mnemonic}") stub_addr = instr1.address stub_dest = 0 # nop/ldr/br pattern if pattern_idx == 0: stub_dest = instr2.operands[1].value.imm # adrp/ldr/br pattern elif pattern_idx == 1: stub_dest_page = instr1.operands[1].value.imm stub_dest_pageoff = instr2.operands[1].mem.disp stub_dest = stub_dest_page + stub_dest_pageoff stub = MachoImpStub(VirtualMemoryPointer(stub_addr), VirtualMemoryPointer(stub_dest)) return stub
def get_function_end_address( self, entry_point: VirtualMemoryPointer ) -> Optional[VirtualMemoryPointer]: cursor = self._db_handle.execute( "SELECT end_address FROM function_boundaries WHERE entry_point = ?", (entry_point, )) with closing(cursor): results = cursor.fetchone() if results is None: return None return VirtualMemoryPointer(results[0])
def get_virtual_base(self) -> VirtualMemoryPointer: """Retrieve the first virtual address of the Mach-O slice Returns: int containing the virtual memory space address that the Mach-O slice requests to begin at """ if not self._virtual_base: text_seg = self.segment_with_name("__TEXT") if not text_seg: raise RuntimeError("Could not find virtual base because binary has no __TEXT segment.") self._virtual_base = VirtualMemoryPointer(text_seg.vmaddr) return self._virtual_base
def read_selectors_from_methlist_ptr( self, methlist_ptr: VirtualMemoryPointer) -> List[ObjcSelector]: """Given the virtual address of a method list, return a List of ObjcSelectors encapsulating each method """ methlist = self.binary.read_struct(methlist_ptr, ObjcMethodListStruct, virtual=True) selectors: List[ObjcSelector] = [] # parse every entry in method list # the first entry appears directly after the ObjcMethodListStruct method_entry_off = methlist_ptr + methlist.sizeof for i in range(methlist.methcount): method_ent = ObjcMethodStruct.read_method_struct( self.binary, method_entry_off, methlist_flags=methlist.flags) # Byte-align IMP, as the lower bits are used for flags method_ent.implementation &= ~0x3 # type: ignore symbol_name = self.binary.get_full_string_from_start_address( method_ent.name) if not symbol_name: raise ValueError( f"Could not get symbol name for {method_ent.name}") # attempt to find corresponding selref selref = self._selector_literal_ptr_to_selref_map.get( method_ent.name) selector = ObjcSelector( symbol_name, selref, VirtualMemoryPointer(method_ent.implementation)) selectors.append(selector) # save this selector in the selref pointer -> selector map if selref: # if this selector is already in the map, check if we now know the implementation addr # we could have parsed the selector literal/selref pair in _parse_selrefs() but not have known the # implementation, but do now. It's also possible the selref is an external method, and thus will not # have a local implementation. if selref.source_address in self._selref_ptr_to_selector_map: previously_parsed_selector = self._selref_ptr_to_selector_map[ selref.source_address] if not previously_parsed_selector.implementation: # delete the old entry, and add back in the next line del self._selref_ptr_to_selector_map[ selref.source_address] self._selref_ptr_to_selector_map[ selref.source_address] = selector method_entry_off += method_ent.sizeof return selectors
def test_ios14_relative_method_lists(self): # Given a binary compiled with a minimum deployment target of iOS 14 parser = MachoParser( TestObjcRuntimeDataParser.IOS14_RELATIVE_METHOD_LIST_BIN_PATH) binary = parser.get_arm64_slice() binary.get_minimum_deployment_target() # When the Objective C methods within the binary are parsed dyld_info_parser = DyldInfoParser(binary) objc_parser = ObjcRuntimeDataParser(binary, dyld_info_parser) selref_selector_map = objc_parser.selrefs_to_selectors() # Then the method structures are correctly parsed assert len(selref_selector_map) == 7 external_sel = selref_selector_map[VirtualMemoryPointer(0x10000C0E0)] assert external_sel.implementation is None assert external_sel.is_external_definition is True assert external_sel.name == "evaluateJavaScript:inFrame:inContentWorld:completionHandler:" internal_sel = selref_selector_map[VirtualMemoryPointer(0x10000C0B0)] assert internal_sel.implementation == VirtualMemoryPointer(0x100007BFC) assert internal_sel.is_external_definition is False assert internal_sel.name == "usesWebView"
def read_word(self, address: int, virtual: bool = True, word_type: Any = None) -> int: """Attempt to read a word from the binary at a virtual address. """ if not word_type: word_type = self.platform_word_type if virtual: file_bytes = self.get_content_from_virtual_address(VirtualMemoryPointer(address), sizeof(word_type)) else: file_bytes = self.get_bytes(StaticFilePointer(address), sizeof(word_type)) if not file_bytes: raise InvalidAddressError(f"Could not read word at address {hex(address)}") return word_type.from_buffer(bytearray(file_bytes)).value
def _protolist_ptr_to_protocol_ptr_list( self, protolist_ptr: VirtualMemoryPointer) -> List[VirtualMemoryPointer]: """Accepts the virtual address of an ObjcProtocolListStruct, and returns List of protocol pointers it refers to. """ protolist = self.binary.read_struct(protolist_ptr, ObjcProtocolListStruct, virtual=True) protocol_pointers: List[VirtualMemoryPointer] = [] # pointers start directly after the 'count' field addr = protolist.binary_offset + protolist.sizeof for i in range(protolist.count): pointer = self.binary.read_word(addr) protocol_pointers.append(VirtualMemoryPointer(pointer)) # step to next protocol pointer in list addr += sizeof(self.binary.platform_word_type) return protocol_pointers
def _parse_objc_classes(self) -> List[ObjcClass]: """Read Objective-C class data in __objc_classlist, __objc_data to get classes and selectors in binary """ logging.debug( "Cross-referencing __objc_classlist, __objc_class, and __objc_data entries..." ) parsed_objc_classes = [] classlist_pointers = self._get_classlist_pointers() for ptr in classlist_pointers: objc_class = self._get_objc_class_from_classlist_pointer(ptr) if objc_class: parsed_class = None # parse the instance method list objc_data_struct = self._get_objc_data_from_objc_class( objc_class) if objc_data_struct: # the class's associated struct __objc_data contains the method list parsed_class = self._parse_objc_data_entry( objc_class, objc_data_struct) # parse the metaclass if it exists # the class stores instance methods and the metaclass's method list contains class methods # the metaclass has the same name as the actual class metaclass = self._get_objc_class_from_classlist_pointer( VirtualMemoryPointer(objc_class.metaclass)) if metaclass: objc_data_struct = self._get_objc_data_from_objc_class( metaclass) if objc_data_struct: parsed_metaclass = self._parse_objc_data_entry( objc_class, objc_data_struct) if parsed_class: # add in selectors from the metaclass to the real class parsed_class.selectors += parsed_metaclass.selectors else: # no base class found, set the base class to the metaclass parsed_class = parsed_metaclass # sanity check # ensure we either found a class or metaclass if not parsed_class: raise RuntimeError(f"Failed to parse classref {hex(ptr)}") parsed_objc_classes.append(parsed_class) self._classrefs_to_objc_classes[ptr] = parsed_class return parsed_objc_classes
def write_bytes(self, data: bytes, address: int, virtual: bool = False) -> "MachoBinary": """Overwrite the data in the current binary with the provided data, returning a new modified binary. Note: This will invalidate the binary's code signature, if present. """ # Ensure there is valid data in this address region by trying to read from it self.get_contents_from_address(address, len(data), virtual) # If the above did not throw an exception, the provided address range is valid. file_offset = address if virtual: file_offset = self.file_offset_for_virtual_address(VirtualMemoryPointer(address)) # Create a new binary with the overwritten data new_binary_data = bytearray(len(self._cached_binary)) new_binary_data[:] = self._cached_binary new_binary_data[file_offset : file_offset + len(data)] = data return MachoBinary(self.path, new_binary_data)
def callable_symbol_for_symbol_name( self, symbol_name: str) -> Optional[CallableSymbol]: """Retrieve information about a name within the imported or exported symbols tables. It's the caller's responsibility to provide a valid callable symbol name. """ c = self._db_handle.cursor() symbols = c.execute( "SELECT * from named_callable_symbols WHERE symbol_name=?", (symbol_name, )).fetchall() if not len(symbols): return None assert len( symbols) == 1, f"Found more than 1 symbol named {symbol_name}?" symbol_data = symbols[0] return CallableSymbol(is_imported=bool(symbol_data[0]), address=VirtualMemoryPointer(symbol_data[1]), symbol_name=symbol_data[2])
def __init__(self, function_analyzer: "ObjcFunctionAnalyzer", instruction: CsInsn) -> None: if instruction.mnemonic not in ObjcConditionalBranchInstruction.CONDITIONAL_BRANCH_MNEMONICS: raise ValueError( f"ObjcConditionalBranchInstruction instantiated with" f" invalid mnemonic {instruction.mnemonic}") # a conditional branch will either hold the destination in first or second operand, depending on mnemonic if instruction.mnemonic in ObjcConditionalBranchInstruction.SINGLE_OP_MNEMONICS: dest_op_idx = 1 elif instruction.mnemonic in ObjcConditionalBranchInstruction.DOUBLE_OP_MNEMONICS: dest_op_idx = 2 else: raise ValueError( f"Unknown conditional mnemonic {instruction.mnemonic}") ObjcBranchInstruction.__init__( self, instruction, VirtualMemoryPointer(instruction.operands[dest_op_idx].value.imm))
def callable_symbol_for_address( self, branch_destination: VirtualMemoryPointer ) -> Optional[CallableSymbol]: """Retrieve information about a callable branch destination. It's the caller's responsibility to provide a valid branch destination with a symbol associated with it. """ c = self._db_handle.cursor() symbols = c.execute( "SELECT * from named_callable_symbols WHERE address=?", (branch_destination, )).fetchall() if not len(symbols): return None assert len( symbols) == 1, f"Found more than 1 symbol at {branch_destination}?" symbol_data = symbols[0] return CallableSymbol(is_imported=bool(symbol_data[0]), address=VirtualMemoryPointer(symbol_data[1]), symbol_name=symbol_data[2])
def class_name_for_class_pointer( self, classref: VirtualMemoryPointer) -> Optional[str]: """Given a classref, return the name of the class. This method will handle classes implemented within the binary and imported classes. """ # Did the caller provide a classref for an imported class? if classref in self.imported_symbols_to_symbol_names: return self.imported_symbols_to_symbol_names[classref] # The class is implemented within the binary and has an associated ObjcClass object # We could have been passed either a classref pointer in __objc_classrefs, or the direct address of # an __objc_data structure in __objc_const. Try both variants to search for the associated class. # First, check if we were provided with the address of an __objc_data struct in __objc_data representing # the class. local_class = [ x for x in self.objc_classes() if x.raw_struct.binary_offset == classref ] if len(local_class): assert len(local_class) == 1 return local_class[0].name # Then, check if we were passed a classref pointer in __objc_classrefs try: dereferenced_classref = VirtualMemoryPointer( self.binary.read_word(classref)) except InvalidAddressError: # Invalid classref return None local_class = [ x for x in self.objc_classes() if x.raw_struct.binary_offset == dereferenced_classref ] if len(local_class): assert len(local_class) == 1 return local_class[0].name # Invalid classref return None
def get_full_string_from_start_address(self, start_address: int, virtual: bool = True ) -> Optional[str]: """Return a string containing the bytes from start_address up to the next NULL character This method will return None if the specified address does not point to a UTF-8 encoded string """ max_len = 16 symbol_name_characters = [] found_null_terminator = False while not found_null_terminator: if virtual: name_bytes = self.get_content_from_virtual_address( VirtualMemoryPointer(start_address), max_len) else: name_bytes = self.get_bytes(StaticFilePointer(start_address), max_len) # search for null terminator in this content for ch in name_bytes: if ch == 0x00: found_null_terminator = True break symbol_name_characters.append(ch) # do we need to keep searching for the end of the symbol name? if not found_null_terminator: # since we read [start_address:start_address + max_len], trim that from search space start_address += max_len # double search space for next iteration max_len *= 2 else: # read full string! try: symbol_name = bytearray(symbol_name_characters).decode() return symbol_name except UnicodeDecodeError: # if decoding the string failed, we may have been passed an address which does not actually # point to a string return None return None
def _patch_msgSend_destination( self, function_analyzer: "ObjcFunctionAnalyzer") -> None: # validate instruction if (not self.is_msgSend_call or self.raw_instr.mnemonic not in ["bl", "b"] or self.symbol not in self.OBJC_MSGSEND_FUNCTIONS): raise ValueError( f"cannot parse objc_msgSend destination on non-msgSend instruction" f" {function_analyzer.format_instruction(self.raw_instr)}") # if this is an objc_msgSend target, patch destination_address to be the address of the targeted IMP # note! this means destination_address is *not* the actual destination address of the instruction # the *real* destination will be a stub function corresponding to _objc_msgSend, but # knowledge of this is largely useless, and the much more valuable piece of information is # which function the selector passed to objc_msgSend corresponds to. # therefore, replace the 'real' destination address with the requested IMP try: selref_ptr = function_analyzer.get_objc_selref(self) selector = function_analyzer.macho_analyzer.selector_for_selref( selref_ptr) if not selector: raise RuntimeError( f"Couldn't get sel for selref ptr {selref_ptr}") # if we couldn't find an IMP for this selref, # it is defined in a class outside this binary self.is_external_objc_call = selector.is_external_definition # Only patch destination_address if the implementation is in this binary. # Otherwise, destination_address will continue to point to __imp_stubs_objc_msgSend if selector.implementation: self.destination_address = selector.implementation self.selref = selector.selref self.selector = selector except RuntimeError: # TODO(PT): Should this ever be hit? self.is_external_objc_call = True self.destination_address = VirtualMemoryPointer(0)
def __init__(self, instruction: CsInsn) -> None: self.raw_instr = instruction self.address = VirtualMemoryPointer(self.raw_instr.address) self.is_msgSend_call: bool = False self.symbol: Optional[str] = None
def _objc_msgSend_addr(self) -> Optional[VirtualMemoryPointer]: objc_msgsend_symbol = self.callable_symbol_for_symbol_name( "_objc_msgSend") if not objc_msgsend_symbol: return None return VirtualMemoryPointer(objc_msgsend_symbol.address)