Beispiel #1
0
    def __init__(self,
                 binary: MachoBinary,
                 instructions: List[CsInsn],
                 method_info: ObjcMethodInfo = None) -> None:
        from strongarm.macho import MachoAnalyzer

        try:
            self.start_address = VirtualMemoryPointer(instructions[0].address)
            last_instruction = instructions[len(instructions) - 1]
            # The end-address is right-exclusive
            self.end_address = VirtualMemoryPointer(
                last_instruction.address) + MachoBinary.BYTES_PER_INSTRUCTION
        except IndexError:
            # this method must have just been a stub with no real instructions!
            self.start_address = VirtualMemoryPointer(0)
            self.end_address = VirtualMemoryPointer(0)

        self.binary = binary
        self.macho_analyzer = MachoAnalyzer.get_analyzer(binary)
        self.instructions = instructions
        self.method_info = method_info

        self._call_targets: Optional[List[ObjcBranchInstruction]] = None

        # Find basic-block-boundaries upfront
        self.basic_blocks = self._find_basic_blocks()
 def test_parses_dsc_images(
         self, dyld_shared_cache: DyldSharedCacheParser) -> None:
     assert len(dyld_shared_cache.embedded_binary_info) == 1400
     # Pick out a binary and ensure its location is reported correctly
     image_path = Path(
         "/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation"
     )
     corefoundation_range = dyld_shared_cache.embedded_binary_info[
         image_path]
     assert corefoundation_range == (VirtualMemoryPointer(0x180DB9000),
                                     VirtualMemoryPointer(0x18111D000))
Beispiel #3
0
    def test_translate_virtual_address(self) -> None:
        # ensure virtual addresses are correctly translated to file offsets
        virt = VirtualMemoryPointer(0x100006DB8)
        correct_bytes = b"application:openURL:sourceApplication:annotation:\x00"
        found_bytes = self.binary.get_content_from_virtual_address(virtual_address=virt, size=len(correct_bytes))
        assert found_bytes == correct_bytes

        # test an address before the end of load commands
        virt = VirtualMemoryPointer(0x100000AD0)
        correct_phys = 0xAD0
        found_phys = self.binary.file_offset_for_virtual_address(virt)
        assert found_phys == correct_phys
    def test_get_symbol_name_objc(self) -> None:
        sel = ObjcSelector(
            "testMethod:",
            ObjcSelref(VirtualMemoryPointer(0), VirtualMemoryPointer(0),
                       "testMethod:"),
            VirtualMemoryPointer(0),
        )
        method_info = ObjcMethodInfo(ObjcClass({}, "TestClass", [sel]), sel,
                                     VirtualMemoryPointer(0))  # type: ignore
        analyzer = ObjcFunctionAnalyzer(self.binary, self.instructions,
                                        method_info)

        symbol_name = analyzer.get_symbol_name()
        assert symbol_name == "-[TestClass testMethod:]"
Beispiel #5
0
def main():
    logging.basicConfig(level=logging.INFO)

    arg_parser = argparse.ArgumentParser(description="dyld_shared_cache symbol map generator")
    arg_parser.add_argument(
        "dyld_shared_cache_path", type=str, help="Path to the dyld_shared_cache which should be symbolicated"
    )
    arg_parser.add_argument("output_csv_path", type=str, help="Output CSV path")
    args = arg_parser.parse_args()

    dyld_shared_cache = DyldSharedCacheParser(Path(args.dyld_shared_cache_path))
    symbols: List[Tuple[VirtualMemoryPointer, str, Path]] = []

    # Iterate each image in the DSC, extract it, and record its symbols
    image_count = len(dyld_shared_cache.embedded_binary_info)
    for idx, path in enumerate(dyld_shared_cache.embedded_binary_info.keys()):
        # The DSC has more than 1,000 binaries, so try to free up resources after each image
        MachoAnalyzer.clear_cache()

        logging.info(f"({idx+1}/{image_count}) Symbolicating {path}...")
        try:
            binary = dyld_shared_cache.get_embedded_binary(path)
            analyzer = MachoAnalyzer.get_analyzer(binary)
            for sym, addr in analyzer.exported_symbol_names_to_pointers.items():
                symbols.append((VirtualMemoryPointer(addr), sym, path))
        except Exception:
            logging.error(f"Failed to symbolicate {path}")
            continue

    with open(str(args.output_csv_path), "w", newline="") as output_csv:
        csv_writer = csv.writer(output_csv, delimiter=",", quoting=csv.QUOTE_MINIMAL)
        for row in symbols:
            csv_writer.writerow(row)
 def test_get_symbol_for_address(self) -> None:
     # Given the address of a function
     address = VirtualMemoryPointer(4294967296)
     # And the symbol name of that function
     symbol_name = self.string_helper.get_symbol_name_for_address(address)
     # The name is the expected value
     assert symbol_name == "__mh_execute_header"
Beispiel #7
0
    def disasm_f(self, args: List[str]) -> None:
        if not len(args):
            print("Usage: disasm [sel]")
            return

        disassembled_str = disassemble_function(
            self.binary, VirtualMemoryPointer(args[0], 16))
        print(disassembled_str)
Beispiel #8
0
    def test_identify_imported_symbols_2(self) -> None:
        parser = MachoParser(TestDyldInfoParser.BINARY2_PATH)
        binary = parser.get_arm64_slice()
        assert binary
        analyzer = MachoAnalyzer.get_analyzer(binary)

        # TestBinary4's dyld binding opcodes utilize BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
        # which previously had a bug where we didn't increment the data pointer after binding

        # Bound symbol in ObjC category in __objc_const
        assert (analyzer.imported_symbols_to_symbol_names[VirtualMemoryPointer(
            0x100212338)] == "_OBJC_CLASS_$_UIAlertView")
        # Bound classref in __objc_classrefs
        assert (analyzer.imported_symbols_to_symbol_names[VirtualMemoryPointer(
            0x10026AE40)] == "_OBJC_CLASS_$_UIAlertView")
        # This API should return the classref, not the bound class in the category definition
        assert analyzer.classref_for_class_name(
            "_OBJC_CLASS_$_UIAlertView") == VirtualMemoryPointer(0x10026AE40)
 def test_find_image_for_code_address(
         self, dyld_shared_cache: DyldSharedCacheParser) -> None:
     # Given an address within an embedded image
     code_addr = VirtualMemoryPointer(0x180AC1000)
     # When I ask which image contains it
     implementing_image = dyld_shared_cache.image_for_text_address(
         code_addr)
     # The correct image is returned
     assert implementing_image == Path(
         "/usr/lib/system/libsystem_malloc.dylib")
 def test_three_op_add(self) -> None:
     # 0x000000010000665c         adrp       x0, #0x102a41000
     # 0x0000000100006660         add        x0, x0, #0x458
     # 0x0000000100006664         bl         0x101f8600c
     three_op_binary = pathlib.Path(
         __file__).parent / "bin" / "ThreeOpAddInstruction"
     binary = MachoParser(three_op_binary).get_arm64_slice()
     assert binary
     analyzer = MachoAnalyzer.get_analyzer(binary)
     function_analyzer = ObjcFunctionAnalyzer(
         binary,
         analyzer.get_function_instructions(
             VirtualMemoryPointer(0x10000665C)))
     target_instr = function_analyzer.get_instruction_at_address(
         VirtualMemoryPointer(0x100006664))
     wrapped_instr = ObjcInstruction.parse_instruction(
         function_analyzer, target_instr)
     contents = function_analyzer.get_register_contents_at_instruction(
         "x0", wrapped_instr)
     assert contents.type == RegisterContentsType.IMMEDIATE
     assert contents.value == 0x102A41458
    def test_get_register_contents_at_instruction_same_reg(self) -> None:
        """Test cases for dataflow where a single register has an immediate, then has a 'data link' from the same reg.
        Related ticket: SCAN-577-dataflow-fix
        """
        # Given I provide assembly where an address is loaded via a page load + page offset, using the same register
        # 0x000000010000428c    adrp       x1, #0x10011a000
        # 0x0000000100004290    add        x1, x1, #0x9c8
        binary = MachoParser(
            TestFunctionAnalyzer.TEST_BINARY_PATH).get_arm64_slice()
        assert binary

        function_analyzer = ObjcFunctionAnalyzer.get_function_analyzer_for_signature(
            binary, "AppDelegate",
            "application:didFinishLaunchingWithOptions:")
        instruction = ObjcInstruction.parse_instruction(
            function_analyzer,
            function_analyzer.get_instruction_at_address(
                VirtualMemoryPointer(0x100004290)))
        # If I ask for the contents of the register
        contents = function_analyzer.get_register_contents_at_instruction(
            "x1", instruction)
        # Then I get the correct value
        assert contents.type == RegisterContentsType.IMMEDIATE
        assert contents.value == 0x10011A9C8

        # Another test case with the same assumptions
        # Given I provide assembly where an address is loaded via a page load + page offset, using the same register
        # 0x0000000100004744    adrp       x8, #0x100115000
        # 0x0000000100004748    ldr        x8, [x8, #0x60]
        instruction = ObjcInstruction.parse_instruction(
            function_analyzer,
            function_analyzer.get_instruction_at_address(
                VirtualMemoryPointer(0x100004748)))
        # If I ask for the contents of the register
        contents = function_analyzer.get_register_contents_at_instruction(
            "x8", instruction)
        # Then I get the correct value
        assert contents.type == RegisterContentsType.IMMEDIATE
        assert contents.value == 0x100115060
Beispiel #12
0
    def test_find_method_code(self) -> None:
        sel = "application:didFinishLaunchingWithOptions:"
        # found in Hopper
        correct_start_address = VirtualMemoryPointer(0x1000066DC)
        correct_end_address = VirtualMemoryPointer(0x1000066E4)

        imp_func = self.analyzer.get_imps_for_sel(sel)[0]
        assert imp_func.start_address == correct_start_address
        assert imp_func.end_address == correct_end_address

        instructions = self.analyzer.get_function_instructions(correct_start_address)
        start_address = instructions[0].address
        end_address = instructions[-1].address

        assert start_address == correct_start_address
        assert end_address == correct_end_address - MachoBinary.BYTES_PER_INSTRUCTION
        assert instructions[0].address == correct_start_address
        assert instructions[-1].address == correct_end_address - MachoBinary.BYTES_PER_INSTRUCTION

        correct_instruction_count = int(
            (correct_end_address - correct_start_address) / MachoBinary.BYTES_PER_INSTRUCTION
        )
        assert len(instructions) == correct_instruction_count
Beispiel #13
0
    def dump_memory(self, args: List[str]) -> None:
        def err() -> None:
            print("Usage: dump [size] [virtual address]")
            return

        if len(args) < 2:
            return err()
        try:
            dump_size = int(args[0], 10)
            address = int(args[1], 16)
        except ValueError as e:
            print(f"Failed to interpret address: {e}")
            return err()

        binary_data = self.binary.get_content_from_virtual_address(
            VirtualMemoryPointer(address), dump_size)

        # split to 16 byte regions
        region_size = 16
        current_index = 0
        while True:
            if current_index >= dump_size:
                break
            # grab the next grouping of bytes
            byte_region = binary_data[current_index:current_index +
                                      region_size]

            region_start = address + current_index
            region_start_str = hex(region_start)
            print(region_start_str, end="\t\t")

            ascii_rep = "|"
            for idx, byte in enumerate(byte_region):
                print("{:02x}".format(byte), end=" ")
                # indent every 8 bytes
                if idx > 0 and (idx + 1) % 8 == 0:
                    print("\t", end="")

                ascii_byte = chr(byte) if 32 <= byte < 127 else "."
                ascii_rep += ascii_byte
            ascii_rep += "|"
            print(ascii_rep)

            current_index += region_size
Beispiel #14
0
    def get_objc_selref(
        self, msgsend_instr: ObjcUnconditionalBranchInstruction
    ) -> VirtualMemoryPointer:
        """Returns the selref pointer at an _objc_msgSend call site.
        When _objc_msgSend is called, x1 contains the selref being messaged.
        The caller is responsible for ensuring this is called at an _objc_msgSend call site.
        """
        if msgsend_instr.raw_instr.mnemonic not in ObjcUnconditionalBranchInstruction.UNCONDITIONAL_BRANCH_MNEMONICS:
            raise ValueError(
                "get_objc_selref() called on non-branch instruction")

        # at an _objc_msgSend call site, the selref is in x1
        contents = self.get_register_contents_at_instruction(
            "x1", msgsend_instr)
        if contents.type != RegisterContentsType.IMMEDIATE:
            raise RuntimeError(
                f"could not determine selref ptr, origates in function arg (type {contents.type.name})"
            )
        return VirtualMemoryPointer(contents.value)
Beispiel #15
0
    def get_symbol_name(self) -> str:
        """Return a objective-c class/method, c function, or sub_address-style string representing the name of
            this block of code.
        """
        if self.method_info:
            return f"-[{self.method_info.objc_class.name} {self.method_info.objc_sel.name}]"
        else:
            # Not an Objective-C method. Try to find a symbol name that matches the address
            strtbl_sym_name = self.macho_analyzer.crossref_helper.get_symbol_name_for_address(
                VirtualMemoryPointer(self.start_address))

            if strtbl_sym_name:
                # Demangle C++ symbols when applicable
                if _is_mangled_cpp_symbol(strtbl_sym_name):
                    strtbl_sym_name = _demangle_cpp_symbol(strtbl_sym_name)

                return strtbl_sym_name

        # Fallback
        # We don't want to format the procedure as sub_<address>, because we use the output of this method to
        # report code locations, and the address of the same procedure might change between subsequent binary builds.
        return "_unsymbolicated_function"
Beispiel #16
0
def annotate_instruction(function_analyzer: ObjcFunctionAnalyzer,
                         sel_args: List[str], instr: CsInsn) -> str:
    annotation = "\t\t"
    # Parse as an ObjcInstruction
    wrapped_instr = ObjcInstruction.parse_instruction(
        function_analyzer,
        function_analyzer.get_instruction_at_address(instr.address))

    if isinstance(wrapped_instr, ObjcBranchInstruction):
        wrapped_branch_instr: ObjcBranchInstruction = wrapped_instr

        annotation += "#\t"
        if function_analyzer.is_local_branch(wrapped_branch_instr):
            annotation += StringPalette.ANNOTATION(
                f"jump loc_{hex(wrapped_branch_instr.destination_address)}")

        elif wrapped_instr.symbol:
            annotation += StringPalette.ANNOTATION(wrapped_instr.symbol)

            if not wrapped_branch_instr.selector:
                annotation += StringPalette.ANNOTATION("();")
            else:
                annotation += StringPalette.ANNOTATION_ARGS(
                    f"(id, @selector({wrapped_branch_instr.selector.name})")

                # Figure out argument count passed to selector
                arg_count = wrapped_branch_instr.selector.name.count(":")
                for i in range(arg_count):
                    # x0 is self, x1 is the SEL, real args start at x2
                    register = f"x{i + 2}"
                    method_arg = function_analyzer.get_register_contents_at_instruction(
                        register, wrapped_branch_instr)

                    method_arg_string = ", "
                    if method_arg.type == RegisterContentsType.IMMEDIATE:
                        method_arg_string += hex(method_arg.value)
                    else:
                        method_arg_string += "<?>"

                    annotation += StringPalette.STRING(method_arg_string)
                annotation += ");"

        else:
            annotation += StringPalette.ANNOTATION(f"({hex(instr.address)})(")
            arg_count = 4
            for i in range(arg_count):
                # x0 is self, x1 is the SEL, real args start at x2
                register = f"x{i}"
                method_arg = function_analyzer.get_register_contents_at_instruction(
                    register, wrapped_instr)

                method_arg_string = f"{register}: "
                if method_arg.type == RegisterContentsType.IMMEDIATE:
                    method_arg_string += hex(method_arg.value)
                else:
                    method_arg_string += "<?>"

                annotation += StringPalette.ANNOTATION_ARGS(method_arg_string)
                annotation += ", "
            annotation += ");"
    else:
        # Try to annotate string loads
        # This code taken from Ethan's potential passwords check
        if instr.mnemonic in ["ldr", "adr", "adrp", "add"]:
            # Only care about general purpose registers that are being written into
            if not ObjcInstruction.instruction_uses_vector_registers(instr):
                _, instr_mutated_regs = instr.regs_access()
                if len(instr_mutated_regs):
                    # Get the contents of the register (an address)
                    register = instr.reg_name(instr_mutated_regs[0])
                    wrapped_instr = ObjcInstruction.parse_instruction(
                        function_analyzer, instr)
                    register_contents = function_analyzer.get_register_contents_at_instruction(
                        register, wrapped_instr)
                    if register_contents.type == RegisterContentsType.IMMEDIATE:
                        # Try reading a string
                        binary_str = function_analyzer.binary.read_string_at_address(
                            VirtualMemoryPointer(register_contents.value))
                        if binary_str:
                            annotation += StringPalette.STRING(
                                f'#\t"{binary_str}"')

    return annotation
Beispiel #17
0
    def test_identify_imported_symbols_1(self) -> None:
        parser = MachoParser(TestDyldInfoParser.BINARY1_PATH)
        binary = parser.get_arm64_slice()
        assert binary
        analyzer = MachoAnalyzer.get_analyzer(binary)

        correct_imported_symbols_raw = {
            4295004408: "_OBJC_CLASS_$_NSURLCredential",
            4295004656: "_OBJC_CLASS_$_NSObject",
            4295004488: "_OBJC_METACLASS_$_NSObject",
            4295004568: "_OBJC_METACLASS_$_NSObject",
            4295004608: "_OBJC_METACLASS_$_NSObject",
            4295004616: "_OBJC_METACLASS_$_NSObject",
            4295004688: "_OBJC_METACLASS_$_NSObject",
            4295004464: "__objc_empty_cache",
            4295004504: "__objc_empty_cache",
            4295004544: "__objc_empty_cache",
            4295004584: "__objc_empty_cache",
            4295004624: "__objc_empty_cache",
            4295004664: "__objc_empty_cache",
            4295004704: "__objc_empty_cache",
            4295004744: "__objc_empty_cache",
            4295000064: "dyld_stub_binder",
            4295000216: "___CFConstantStringClassReference",
            4295000248: "___CFConstantStringClassReference",
            4295000280: "___CFConstantStringClassReference",
            4295000312: "___CFConstantStringClassReference",
            4295000344: "___CFConstantStringClassReference",
            4295000376: "___CFConstantStringClassReference",
            4295000408: "___CFConstantStringClassReference",
            4295004400: "_OBJC_CLASS_$_UIFont",
            4295004456: "_OBJC_CLASS_$_UILabel",
            4295004736: "_OBJC_CLASS_$_UIResponder",
            4295004536: "_OBJC_CLASS_$_UIViewController",
            4295004496: "_OBJC_METACLASS_$_UILabel",
            4295004696: "_OBJC_METACLASS_$_UIResponder",
            4295004576: "_OBJC_METACLASS_$_UIViewController",
            4295000080: "_NSClassFromString",
            4295000088: "_NSLog",
            4295000096: "_NSStringFromCGRect",
            4295000104: "_NSStringFromClass",
            4295000112: "_SecTrustEvaluate",
            4295000120: "_UIApplicationMain",
            4295000128: "_dlopen",
            4295000136: "_objc_autoreleasePoolPop",
            4295000144: "_objc_autoreleasePoolPush",
            4295000152: "_objc_getClass",
            4295000160: "_objc_msgSend",
            4295000168: "_objc_msgSendSuper2",
            4295000176: "_objc_release",
            4295000184: "_objc_retain",
            4295000192: "_objc_retainAutoreleasedReturnValue",
            4295000200: "_objc_storeStrong",
            4295000208: "_rand",
        }
        correct_imported_symbols = {
            VirtualMemoryPointer(k): v
            for k, v in correct_imported_symbols_raw.items()
        }

        assert analyzer.imported_symbols_to_symbol_names == correct_imported_symbols
        for imported_pointer in correct_imported_symbols.keys():
            symbol_name = correct_imported_symbols[imported_pointer]
            if "_OBJC_CLASS_$_" in symbol_name:
                assert analyzer.class_name_for_class_pointer(
                    imported_pointer) == symbol_name