예제 #1
0
    def test_ios13_absolute_method_lists(self):
        # Given a binary compiled with a minimum deployment target of iOS 13
        parser = MachoParser(
            TestObjcRuntimeDataParser.IOS13_ABSOLUTE_METHOD_LIST_BIN_PATH)
        binary = parser.get_arm64_slice()
        binary.get_minimum_deployment_target()

        # When the Objective C methods within the binary are parsed
        dyld_info_parser = DyldInfoParser(binary)
        objc_parser = ObjcRuntimeDataParser(binary, dyld_info_parser)
        selref_selector_map = objc_parser.selrefs_to_selectors()

        # Then the method structures are correctly parsed
        assert len(selref_selector_map) == 3

        s1 = selref_selector_map[VirtualMemoryPointer(0x10000D380)]
        assert s1.implementation is None
        assert s1.is_external_definition is True
        assert s1.name == "role"

        s2 = selref_selector_map[VirtualMemoryPointer(0x10000D388)]
        assert s2.implementation is None
        assert s2.is_external_definition is True
        assert s2.name == "initWithName:sessionRole:"

        s3 = selref_selector_map[VirtualMemoryPointer(0x10000D378)]
        assert s3.implementation == VirtualMemoryPointer(0x100006354)
        assert s3.is_external_definition is False
        assert s3.name == "viewDidLoad"
예제 #2
0
    def get_function_boundaries(
            self) -> Set[Tuple[VirtualMemoryPointer, VirtualMemoryPointer]]:
        cursor = self._db_handle.execute(
            "SELECT entry_point, end_address FROM function_boundaries")

        with closing(cursor):
            return {(VirtualMemoryPointer(a), VirtualMemoryPointer(b))
                    for a, b in cursor}
예제 #3
0
 def get_basic_block_boundaries(
     self, entry_point: VirtualMemoryPointer
 ) -> List[Tuple[VirtualMemoryPointer, VirtualMemoryPointer]]:
     """Given the function starting at the provided address, return the list of (start_addr, end_addr) basic blocks.
     """
     cursor = self._db_handle.execute(
         "SELECT start_address, end_address FROM basic_blocks WHERE entry_point=?",
         (entry_point, ))
     with closing(cursor):
         return [(VirtualMemoryPointer(x[0]), VirtualMemoryPointer(x[1]))
                 for x in cursor]
예제 #4
0
 def string_xrefs_to(
     self, string_literal: str
 ) -> List[Tuple[VirtualMemoryPointer, VirtualMemoryPointer]]:
     """Retrieve each code location that loads the provided (C or CF) string.
     Returns a tuple of (function entry point, instruction which completes the string load)
     """
     c = self._db_handle.cursor()
     xrefs_query = c.execute(
         "SELECT accessor_func_start_address, accessor_address from string_xrefs WHERE string_literal=?",
         (string_literal, ),
     ).fetchall()
     string_xrefs = [(VirtualMemoryPointer(x[0]),
                      VirtualMemoryPointer(x[1])) for x in xrefs_query]
     return string_xrefs
예제 #5
0
    def _add_superclass_or_base_class_name_to_classes(
            self, classes: List[ObjcClass],
            dyld_info_parser: DyldInfoParser) -> None:
        """Iterate each ObjC class/category, and backfill its superclass/base_class name, respectively.

        Linking super/base_classes needs two data-sources, depending on whether the super/base_class is imported or not:
        - To retrieve the class names of imported super/base classes, this needs the map of bound dyld symbols
        - To retrieve the class names of locally implemented classes, this needs the full list of ObjcClasses
        """
        # For efficiency, build a map of (struct address -> class name)
        addr_to_class_names = {
            x.raw_struct.binary_offset: x.name
            for x in classes
        }

        for objc_class_or_category in classes:
            raw_struct = objc_class_or_category.raw_struct
            # This method uses the fact that `struct __objc_data.superclass` and `struct __objc_category.base_class`
            # have the same memory layout, being placed one 64-bit word after the start of the structure.
            base_class_field_addr = VirtualMemoryPointer(
                raw_struct.binary_offset + sizeof(c_uint64))

            # If the base class is an imported classref, the imported classref will be bound to its runtime load address
            # by dyld. Look up whether we have an import-binding for the `base_class` field of this structure.
            if base_class_field_addr in dyld_info_parser.dyld_stubs_to_symbols:
                imported_base_class_sym = dyld_info_parser.dyld_stubs_to_symbols[
                    base_class_field_addr]
                base_class_name = imported_base_class_sym.name

            else:
                dereferenced_classref = VirtualMemoryPointer(
                    self.binary.read_word(base_class_field_addr))
                # The base class is implemented in this binary, and we should have a corresponding ObjcClass object.
                if dereferenced_classref in addr_to_class_names:
                    base_class_name = addr_to_class_names[
                        dereferenced_classref]
                else:
                    logging.error(
                        f"Failed to find a corresponding ObjC class for ref {dereferenced_classref} from "
                        f"{objc_class_or_category}")
                    base_class_name = "$_Unknown_Class"

            if isinstance(objc_class_or_category, ObjcCategory):
                objc_class_or_category.base_class = base_class_name
                # Update the name attribute to hold the parsed category name
                objc_class_or_category.name = f"{base_class_name} ({objc_class_or_category.category_name})"
            else:
                objc_class_or_category.superclass_name = base_class_name
예제 #6
0
    def classref_for_class_name(
            self, class_name: str) -> Optional[VirtualMemoryPointer]:
        """Given a class name, try to find a classref for it.
        """
        classrefs = [
            addr
            for addr, name in self.imported_symbols_to_symbol_names.items()
            if name == class_name and self.binary.section_name_for_address(
                addr) == "__objc_classrefs"
        ]
        if len(classrefs):
            return classrefs[0]

        # TODO(PT): this is expensive! We should do one analysis step of __objc_classrefs and create a map.
        classref_locations, classref_destinations = self.binary.read_pointer_section(
            "__objc_classrefs")

        # is it a local class?
        class_locations = [
            x.raw_struct.binary_offset for x in self.objc_classes()
            if x.name == class_name
        ]
        if not len(class_locations):
            # unknown class name
            return None
        class_location = VirtualMemoryPointer(class_locations[0])

        if class_location not in classref_destinations:
            # unknown class name
            return None

        classref_index = classref_destinations.index(class_location)
        return classref_locations[classref_index]
예제 #7
0
    def _stringref_for_cfstring(self,
                                string: str) -> Optional[VirtualMemoryPointer]:
        """Try to find the stringref in __cfstrings for a provided Objective-C string literal.
        If the string is not present in the __cfstrings section, this method returns None.
        """
        # TODO(PT): This is SLOW and WASTEFUL!!!
        # These transformations should be done ONCE on initial analysis!
        cfstrings_section = self.binary.section_with_name(
            "__cfstring", "__DATA")
        if not cfstrings_section:
            return None

        sizeof_cfstring = sizeof(
            CFString64) if self.binary.is_64bit else sizeof(CFString32)
        cfstrings_base = cfstrings_section.address

        cfstrings_count = int(
            (cfstrings_section.end_address - cfstrings_section.address) /
            sizeof_cfstring)
        for i in range(cfstrings_count):
            cfstring_addr = cfstrings_base + (i * sizeof_cfstring)
            cfstring = self.binary.read_struct(cfstring_addr,
                                               CFStringStruct,
                                               virtual=True)

            # check if this is the string the user requested
            string_address = cfstring.literal
            if self.binary.read_string_at_address(string_address) == string:
                return VirtualMemoryPointer(cfstring_addr)

        return None
예제 #8
0
    def get_functions(self) -> Set[VirtualMemoryPointer]:
        """Get a list of the function entry points defined in LC_FUNCTION_STARTS. This includes objective-c methods.

        Returns: A list of VirtualMemoryPointers corresponding to each function's entry point.
        """
        # TODO(PT): move read_uleb somewhere else
        from .dyld_info_parser import DyldInfoParser

        if self._functions_list:
            return self._functions_list

        # Cannot do anything without LC_FUNCTIONS_START
        if not self._function_starts_cmd:
            return set()

        functions_list = set()

        fs_start = self._function_starts_cmd.dataoff
        fs_size = self._function_starts_cmd.datasize
        fs_uleb = self.get_contents_from_address(fs_start, fs_size)

        address = int(self.get_virtual_base())

        idx = 0
        while idx < fs_size:
            address_delta, idx = DyldInfoParser.read_uleb(fs_uleb, idx)

            address += address_delta
            func_entry = VirtualMemoryPointer(address)
            functions_list.add(func_entry)

        self._functions_list = functions_list
        return self._functions_list
예제 #9
0
    def read_pointer_section(
        self, section_name: str
    ) -> Tuple[List[VirtualMemoryPointer], List[VirtualMemoryPointer]]:
        """Read all the pointers in a section

        It is the caller's responsibility to only call this with a `section_name` which indicates a section which should
        only contain a pointer list.

        The return value is two lists of pointers.
        The first List contains the virtual addresses of each entry in the section.
        The second List contains the pointer values contained at each of these addresses.

        The indexes of these two lists are matched up; that is, list1[0] is the virtual address of the first pointer
        in the requested section, and list2[0] is the pointer value contained at that address.
        """
        locations: List[VirtualMemoryPointer] = []
        entries: List[VirtualMemoryPointer] = []

        # PT: Assume a pointer-list-section will always be in __DATA or __DATA_CONST. True as far as I know.
        for segment in ["__DATA", "__DATA_CONST"]:
            section = self.section_with_name(section_name, segment)
            if section:
                break
        else:
            # Couldn't find the desired section
            return locations, entries

        section_base = section.address
        section_data = section.content

        binary_word = self.platform_word_type
        pointer_count = int(len(section_data) / sizeof(binary_word))
        pointer_off = 0

        for i in range(pointer_count):
            # convert section offset of entry to absolute virtual address
            locations.append(VirtualMemoryPointer(section_base + pointer_off))

            data_end = pointer_off + sizeof(binary_word)
            val = binary_word.from_buffer(
                bytearray(section_data[pointer_off:data_end])).value
            entries.append(VirtualMemoryPointer(val))

            pointer_off += sizeof(binary_word)

        return locations, entries
예제 #10
0
 def get_contents_from_address(self, address: int, size: int, is_virtual: bool = False) -> bytearray:
     """Get a bytesarray from a specified address, size and virtualness
     TODO(FS): change all methods that use addresses as ints to the VirtualAddress/StaticAddress class pair to better
      express intent
     """
     if is_virtual:
         return self.get_content_from_virtual_address(VirtualMemoryPointer(address), size)
     else:
         return self.get_bytes(StaticFilePointer(address), size)
예제 #11
0
    def __init__(
        self,
        function_analyzer: "ObjcFunctionAnalyzer",
        instruction: CsInsn,
        patch_msgSend_destination: bool = True,
        container_function_boundary: Tuple[VirtualMemoryPointer,
                                           VirtualMemoryPointer] = None,
    ) -> None:
        if instruction.mnemonic not in ObjcUnconditionalBranchInstruction.UNCONDITIONAL_BRANCH_MNEMONICS:
            raise ValueError(
                f"ObjcUnconditionalBranchInstruction instantiated with"
                f" invalid mnemonic {instruction.mnemonic}")
        # an unconditional branch has the destination as the only operand
        super().__init__(
            instruction,
            VirtualMemoryPointer(instruction.operands[0].value.imm))

        self.selref: Optional[ObjcSelref] = None
        self.selector: Optional[ObjcSelector] = None

        analyzer = MachoAnalyzer.get_analyzer(function_analyzer.binary)

        if container_function_boundary:
            if self.destination_address >= container_function_boundary[0]:
                if self.destination_address < container_function_boundary[1]:
                    # Local basic-block branch within a function
                    # print(f'{self.destination_address} local branch fast path')
                    self.symbol = None
                    self.is_external_c_call = False
                    self.is_msgSend_call = False
                    return

        called_sym = analyzer.callable_symbol_for_address(
            self.destination_address)
        if not called_sym:
            # Branch to an anonymous destination
            # Might be a basic block within a function or some other label
            # logging.debug(f'No symbol for branch destination {hex(self.destination_address)}')
            self.is_external_c_call = False
            self.is_msgSend_call = False
            self.symbol = None
            return

        self.symbol = called_sym.symbol_name
        self.is_external_c_call = called_sym.is_imported

        if called_sym.is_imported:
            if called_sym.symbol_name in self.OBJC_MSGSEND_FUNCTIONS:
                self.is_msgSend_call = True
                self.is_external_c_call = False
                if patch_msgSend_destination:
                    self._patch_msgSend_destination(function_analyzer)
            else:
                self.is_msgSend_call = False
        else:
            self.is_msgSend_call = False
예제 #12
0
    def _build_function_boundaries_index(self) -> None:
        """Iterate all the entry points listed in the binary metadata and compute the end-of-function address for each.
        The end-of-function address for each entry point is then stored in a DB table.

        To compute function boundaries, each function's basic blocks are determined. The end-address is then the
        final address in the final basic block.
        """
        cursor = self._db_handle.cursor()
        sorted_entry_points = sorted(self.get_functions())

        # Computing a function boundaries uses the next entry point address as a hint. For the last entry point in the
        # binary, use the end of the section as the hint.
        try:
            last_entry = sorted_entry_points[-1]
        except IndexError:
            pass
        else:
            section = self.binary.section_for_address(last_entry)
            assert section is not None and section.end_address >= last_entry
            sorted_entry_points.append(
                VirtualMemoryPointer(section.end_address))

        for entry_point, end_address in pairwise(sorted_entry_points):
            # The end address of the function is the last instruction in the last basic block
            basic_blocks = [
                x for x in self._compute_function_basic_blocks(
                    entry_point, end_address)
            ]
            # If we found a function with no code, just skip it
            # This can happen in the assembly unit tests, where we insert a jump to a dummy __text label
            if len(basic_blocks) == 0:
                continue
            end_address = VirtualMemoryPointer(
                max((bb_end for _, bb_end in basic_blocks)))
            cursor.execute(
                "INSERT INTO function_boundaries (entry_point, end_address) VALUES (?, ?)",
                (entry_point, end_address))
            cursor.executemany("INSERT INTO basic_blocks VALUES (?, ?, ?)",
                               [(entry_point, t[0], t[1])
                                for t in basic_blocks])

        with self._db_handle:
            cursor.close()
예제 #13
0
 def strings_in_func(
     self, func_addr: VirtualMemoryPointer
 ) -> List[Tuple[VirtualMemoryPointer, str]]:
     """Fetch the list of strings referenced by the provided function.
     Returns a tuple of (instruction that completes the string load, loaded string literal)
     """
     c = self._db_handle.cursor()
     xrefs: Iterable[Tuple[int, str]] = c.execute(
         "SELECT accessor_address, string_literal from string_xrefs WHERE accessor_func_start_address=?",
         (func_addr, ),
     )
     string_loads = [(VirtualMemoryPointer(x[0]), x[1]) for x in xrefs]
     return string_loads
예제 #14
0
    def _parse_stub_from_instructions(instr1: CsInsn, instr2: CsInsn,
                                      instr3: CsInsn) -> MachoImpStub:
        # TODO(PT): write CsInsn by hand to test this function
        # each stub follows one of two patterns
        # pattern 1: nop / ldr x16, <sym> / br x16
        # pattern 2: adrp x16, <page> / ldr x16, [x16 <offset>] / br x16
        # try parsing both of these formats
        patterns = [["nop", "ldr", "br"], ["adrp", "ldr", "br"]]
        # differentiate between patterns by looking at the opcode of the first instruction
        if instr1.mnemonic == patterns[0][0]:
            pattern_idx = 0
        elif instr1.mnemonic == patterns[1][0]:
            pattern_idx = 1
        else:
            # unknown stub format
            raise NotImplementedError({hex(instr1.address)})

        expected_ops = patterns[pattern_idx]
        for idx, op in enumerate([instr1, instr2, instr3]):
            # sanity check
            if op.mnemonic != expected_ops[idx]:
                raise RuntimeError(
                    f"Expected instr {hex(op.address)} (idx {idx}) to be {expected_ops[idx]}"
                    f" while parsing stub, was instead {op.mnemonic}")

        stub_addr = instr1.address
        stub_dest = 0
        # nop/ldr/br pattern
        if pattern_idx == 0:
            stub_dest = instr2.operands[1].value.imm
        # adrp/ldr/br pattern
        elif pattern_idx == 1:
            stub_dest_page = instr1.operands[1].value.imm
            stub_dest_pageoff = instr2.operands[1].mem.disp
            stub_dest = stub_dest_page + stub_dest_pageoff
        stub = MachoImpStub(VirtualMemoryPointer(stub_addr),
                            VirtualMemoryPointer(stub_dest))
        return stub
예제 #15
0
    def get_function_end_address(
            self, entry_point: VirtualMemoryPointer
    ) -> Optional[VirtualMemoryPointer]:
        cursor = self._db_handle.execute(
            "SELECT end_address FROM function_boundaries WHERE entry_point = ?",
            (entry_point, ))

        with closing(cursor):
            results = cursor.fetchone()

        if results is None:
            return None

        return VirtualMemoryPointer(results[0])
예제 #16
0
    def get_virtual_base(self) -> VirtualMemoryPointer:
        """Retrieve the first virtual address of the Mach-O slice

        Returns:
            int containing the virtual memory space address that the Mach-O slice requests to begin at

        """
        if not self._virtual_base:
            text_seg = self.segment_with_name("__TEXT")
            if not text_seg:
                raise RuntimeError("Could not find virtual base because binary has no __TEXT segment.")
            self._virtual_base = VirtualMemoryPointer(text_seg.vmaddr)

        return self._virtual_base
예제 #17
0
    def read_selectors_from_methlist_ptr(
            self, methlist_ptr: VirtualMemoryPointer) -> List[ObjcSelector]:
        """Given the virtual address of a method list, return a List of ObjcSelectors encapsulating each method
        """
        methlist = self.binary.read_struct(methlist_ptr,
                                           ObjcMethodListStruct,
                                           virtual=True)
        selectors: List[ObjcSelector] = []
        # parse every entry in method list
        # the first entry appears directly after the ObjcMethodListStruct
        method_entry_off = methlist_ptr + methlist.sizeof
        for i in range(methlist.methcount):
            method_ent = ObjcMethodStruct.read_method_struct(
                self.binary, method_entry_off, methlist_flags=methlist.flags)
            # Byte-align IMP, as the lower bits are used for flags
            method_ent.implementation &= ~0x3  # type: ignore

            symbol_name = self.binary.get_full_string_from_start_address(
                method_ent.name)
            if not symbol_name:
                raise ValueError(
                    f"Could not get symbol name for {method_ent.name}")
            # attempt to find corresponding selref
            selref = self._selector_literal_ptr_to_selref_map.get(
                method_ent.name)

            selector = ObjcSelector(
                symbol_name, selref,
                VirtualMemoryPointer(method_ent.implementation))
            selectors.append(selector)

            # save this selector in the selref pointer -> selector map
            if selref:
                # if this selector is already in the map, check if we now know the implementation addr
                # we could have parsed the selector literal/selref pair in _parse_selrefs() but not have known the
                # implementation, but do now. It's also possible the selref is an external method, and thus will not
                # have a local implementation.
                if selref.source_address in self._selref_ptr_to_selector_map:
                    previously_parsed_selector = self._selref_ptr_to_selector_map[
                        selref.source_address]
                    if not previously_parsed_selector.implementation:
                        # delete the old entry, and add back in the next line
                        del self._selref_ptr_to_selector_map[
                            selref.source_address]
                self._selref_ptr_to_selector_map[
                    selref.source_address] = selector

            method_entry_off += method_ent.sizeof
        return selectors
예제 #18
0
    def test_ios14_relative_method_lists(self):
        # Given a binary compiled with a minimum deployment target of iOS 14
        parser = MachoParser(
            TestObjcRuntimeDataParser.IOS14_RELATIVE_METHOD_LIST_BIN_PATH)
        binary = parser.get_arm64_slice()
        binary.get_minimum_deployment_target()

        # When the Objective C methods within the binary are parsed
        dyld_info_parser = DyldInfoParser(binary)
        objc_parser = ObjcRuntimeDataParser(binary, dyld_info_parser)
        selref_selector_map = objc_parser.selrefs_to_selectors()

        # Then the method structures are correctly parsed
        assert len(selref_selector_map) == 7

        external_sel = selref_selector_map[VirtualMemoryPointer(0x10000C0E0)]
        assert external_sel.implementation is None
        assert external_sel.is_external_definition is True
        assert external_sel.name == "evaluateJavaScript:inFrame:inContentWorld:completionHandler:"

        internal_sel = selref_selector_map[VirtualMemoryPointer(0x10000C0B0)]
        assert internal_sel.implementation == VirtualMemoryPointer(0x100007BFC)
        assert internal_sel.is_external_definition is False
        assert internal_sel.name == "usesWebView"
예제 #19
0
    def read_word(self, address: int, virtual: bool = True, word_type: Any = None) -> int:
        """Attempt to read a word from the binary at a virtual address.
        """
        if not word_type:
            word_type = self.platform_word_type

        if virtual:
            file_bytes = self.get_content_from_virtual_address(VirtualMemoryPointer(address), sizeof(word_type))
        else:
            file_bytes = self.get_bytes(StaticFilePointer(address), sizeof(word_type))

        if not file_bytes:
            raise InvalidAddressError(f"Could not read word at address {hex(address)}")

        return word_type.from_buffer(bytearray(file_bytes)).value
예제 #20
0
 def _protolist_ptr_to_protocol_ptr_list(
         self,
         protolist_ptr: VirtualMemoryPointer) -> List[VirtualMemoryPointer]:
     """Accepts the virtual address of an ObjcProtocolListStruct, and returns List of protocol pointers it refers to.
     """
     protolist = self.binary.read_struct(protolist_ptr,
                                         ObjcProtocolListStruct,
                                         virtual=True)
     protocol_pointers: List[VirtualMemoryPointer] = []
     # pointers start directly after the 'count' field
     addr = protolist.binary_offset + protolist.sizeof
     for i in range(protolist.count):
         pointer = self.binary.read_word(addr)
         protocol_pointers.append(VirtualMemoryPointer(pointer))
         # step to next protocol pointer in list
         addr += sizeof(self.binary.platform_word_type)
     return protocol_pointers
예제 #21
0
    def _parse_objc_classes(self) -> List[ObjcClass]:
        """Read Objective-C class data in __objc_classlist, __objc_data to get classes and selectors in binary
        """
        logging.debug(
            "Cross-referencing __objc_classlist, __objc_class, and __objc_data entries..."
        )
        parsed_objc_classes = []
        classlist_pointers = self._get_classlist_pointers()
        for ptr in classlist_pointers:
            objc_class = self._get_objc_class_from_classlist_pointer(ptr)
            if objc_class:
                parsed_class = None
                # parse the instance method list
                objc_data_struct = self._get_objc_data_from_objc_class(
                    objc_class)
                if objc_data_struct:
                    # the class's associated struct __objc_data contains the method list
                    parsed_class = self._parse_objc_data_entry(
                        objc_class, objc_data_struct)

                # parse the metaclass if it exists
                # the class stores instance methods and the metaclass's method list contains class methods
                # the metaclass has the same name as the actual class
                metaclass = self._get_objc_class_from_classlist_pointer(
                    VirtualMemoryPointer(objc_class.metaclass))
                if metaclass:
                    objc_data_struct = self._get_objc_data_from_objc_class(
                        metaclass)
                    if objc_data_struct:
                        parsed_metaclass = self._parse_objc_data_entry(
                            objc_class, objc_data_struct)
                        if parsed_class:
                            # add in selectors from the metaclass to the real class
                            parsed_class.selectors += parsed_metaclass.selectors
                        else:
                            # no base class found, set the base class to the metaclass
                            parsed_class = parsed_metaclass

                # sanity check
                # ensure we either found a class or metaclass
                if not parsed_class:
                    raise RuntimeError(f"Failed to parse classref {hex(ptr)}")
                parsed_objc_classes.append(parsed_class)
                self._classrefs_to_objc_classes[ptr] = parsed_class

        return parsed_objc_classes
예제 #22
0
    def write_bytes(self, data: bytes, address: int, virtual: bool = False) -> "MachoBinary":
        """Overwrite the data in the current binary with the provided data, returning a new modified binary.
        Note: This will invalidate the binary's code signature, if present.
        """
        # Ensure there is valid data in this address region by trying to read from it
        self.get_contents_from_address(address, len(data), virtual)

        # If the above did not throw an exception, the provided address range is valid.
        file_offset = address
        if virtual:
            file_offset = self.file_offset_for_virtual_address(VirtualMemoryPointer(address))

        # Create a new binary with the overwritten data
        new_binary_data = bytearray(len(self._cached_binary))
        new_binary_data[:] = self._cached_binary
        new_binary_data[file_offset : file_offset + len(data)] = data

        return MachoBinary(self.path, new_binary_data)
예제 #23
0
    def callable_symbol_for_symbol_name(
            self, symbol_name: str) -> Optional[CallableSymbol]:
        """Retrieve information about a name within the imported or exported symbols tables.
        It's the caller's responsibility to provide a valid callable symbol name.
        """
        c = self._db_handle.cursor()
        symbols = c.execute(
            "SELECT * from named_callable_symbols WHERE symbol_name=?",
            (symbol_name, )).fetchall()
        if not len(symbols):
            return None
        assert len(
            symbols) == 1, f"Found more than 1 symbol named {symbol_name}?"
        symbol_data = symbols[0]

        return CallableSymbol(is_imported=bool(symbol_data[0]),
                              address=VirtualMemoryPointer(symbol_data[1]),
                              symbol_name=symbol_data[2])
예제 #24
0
    def __init__(self, function_analyzer: "ObjcFunctionAnalyzer",
                 instruction: CsInsn) -> None:
        if instruction.mnemonic not in ObjcConditionalBranchInstruction.CONDITIONAL_BRANCH_MNEMONICS:
            raise ValueError(
                f"ObjcConditionalBranchInstruction instantiated with"
                f" invalid mnemonic {instruction.mnemonic}")

        # a conditional branch will either hold the destination in first or second operand, depending on mnemonic
        if instruction.mnemonic in ObjcConditionalBranchInstruction.SINGLE_OP_MNEMONICS:
            dest_op_idx = 1
        elif instruction.mnemonic in ObjcConditionalBranchInstruction.DOUBLE_OP_MNEMONICS:
            dest_op_idx = 2
        else:
            raise ValueError(
                f"Unknown conditional mnemonic {instruction.mnemonic}")

        ObjcBranchInstruction.__init__(
            self, instruction,
            VirtualMemoryPointer(instruction.operands[dest_op_idx].value.imm))
예제 #25
0
    def callable_symbol_for_address(
            self, branch_destination: VirtualMemoryPointer
    ) -> Optional[CallableSymbol]:
        """Retrieve information about a callable branch destination.
        It's the caller's responsibility to provide a valid branch destination with a symbol associated with it.
        """
        c = self._db_handle.cursor()
        symbols = c.execute(
            "SELECT * from named_callable_symbols WHERE address=?",
            (branch_destination, )).fetchall()
        if not len(symbols):
            return None
        assert len(
            symbols) == 1, f"Found more than 1 symbol at {branch_destination}?"
        symbol_data = symbols[0]

        return CallableSymbol(is_imported=bool(symbol_data[0]),
                              address=VirtualMemoryPointer(symbol_data[1]),
                              symbol_name=symbol_data[2])
예제 #26
0
    def class_name_for_class_pointer(
            self, classref: VirtualMemoryPointer) -> Optional[str]:
        """Given a classref, return the name of the class.
        This method will handle classes implemented within the binary and imported classes.
        """
        # Did the caller provide a classref for an imported class?
        if classref in self.imported_symbols_to_symbol_names:
            return self.imported_symbols_to_symbol_names[classref]

        # The class is implemented within the binary and has an associated ObjcClass object
        # We could have been passed either a classref pointer in __objc_classrefs, or the direct address of
        # an __objc_data structure in __objc_const. Try both variants to search for the associated class.

        # First, check if we were provided with the address of an __objc_data struct in __objc_data representing
        # the class.
        local_class = [
            x for x in self.objc_classes()
            if x.raw_struct.binary_offset == classref
        ]
        if len(local_class):
            assert len(local_class) == 1
            return local_class[0].name

        # Then, check if we were passed a classref pointer in __objc_classrefs
        try:
            dereferenced_classref = VirtualMemoryPointer(
                self.binary.read_word(classref))
        except InvalidAddressError:
            # Invalid classref
            return None

        local_class = [
            x for x in self.objc_classes()
            if x.raw_struct.binary_offset == dereferenced_classref
        ]
        if len(local_class):
            assert len(local_class) == 1
            return local_class[0].name

        # Invalid classref
        return None
예제 #27
0
    def get_full_string_from_start_address(self,
                                           start_address: int,
                                           virtual: bool = True
                                           ) -> Optional[str]:
        """Return a string containing the bytes from start_address up to the next NULL character
        This method will return None if the specified address does not point to a UTF-8 encoded string
        """
        max_len = 16
        symbol_name_characters = []
        found_null_terminator = False

        while not found_null_terminator:
            if virtual:
                name_bytes = self.get_content_from_virtual_address(
                    VirtualMemoryPointer(start_address), max_len)
            else:
                name_bytes = self.get_bytes(StaticFilePointer(start_address),
                                            max_len)
            # search for null terminator in this content
            for ch in name_bytes:
                if ch == 0x00:
                    found_null_terminator = True
                    break
                symbol_name_characters.append(ch)

            # do we need to keep searching for the end of the symbol name?
            if not found_null_terminator:
                # since we read [start_address:start_address + max_len], trim that from search space
                start_address += max_len
                # double search space for next iteration
                max_len *= 2
            else:
                # read full string!
                try:
                    symbol_name = bytearray(symbol_name_characters).decode()
                    return symbol_name
                except UnicodeDecodeError:
                    # if decoding the string failed, we may have been passed an address which does not actually
                    # point to a string
                    return None
        return None
예제 #28
0
    def _patch_msgSend_destination(
            self, function_analyzer: "ObjcFunctionAnalyzer") -> None:
        # validate instruction
        if (not self.is_msgSend_call
                or self.raw_instr.mnemonic not in ["bl", "b"]
                or self.symbol not in self.OBJC_MSGSEND_FUNCTIONS):
            raise ValueError(
                f"cannot parse objc_msgSend destination on non-msgSend instruction"
                f" {function_analyzer.format_instruction(self.raw_instr)}")
        # if this is an objc_msgSend target, patch destination_address to be the address of the targeted IMP
        # note! this means destination_address is *not* the actual destination address of the instruction
        # the *real* destination will be a stub function corresponding to _objc_msgSend, but
        # knowledge of this is largely useless, and the much more valuable piece of information is
        # which function the selector passed to objc_msgSend corresponds to.
        # therefore, replace the 'real' destination address with the requested IMP
        try:
            selref_ptr = function_analyzer.get_objc_selref(self)
            selector = function_analyzer.macho_analyzer.selector_for_selref(
                selref_ptr)
            if not selector:
                raise RuntimeError(
                    f"Couldn't get sel for selref ptr {selref_ptr}")
            # if we couldn't find an IMP for this selref,
            # it is defined in a class outside this binary
            self.is_external_objc_call = selector.is_external_definition

            # Only patch destination_address if the implementation is in this binary.
            # Otherwise, destination_address will continue to point to __imp_stubs_objc_msgSend
            if selector.implementation:
                self.destination_address = selector.implementation
            self.selref = selector.selref
            self.selector = selector
        except RuntimeError:
            # TODO(PT): Should this ever be hit?
            self.is_external_objc_call = True
            self.destination_address = VirtualMemoryPointer(0)
예제 #29
0
    def __init__(self, instruction: CsInsn) -> None:
        self.raw_instr = instruction
        self.address = VirtualMemoryPointer(self.raw_instr.address)

        self.is_msgSend_call: bool = False
        self.symbol: Optional[str] = None
예제 #30
0
 def _objc_msgSend_addr(self) -> Optional[VirtualMemoryPointer]:
     objc_msgsend_symbol = self.callable_symbol_for_symbol_name(
         "_objc_msgSend")
     if not objc_msgsend_symbol:
         return None
     return VirtualMemoryPointer(objc_msgsend_symbol.address)