Exemple #1
0
def main():
    logging.basicConfig(level=logging.INFO)

    arg_parser = argparse.ArgumentParser(description="dyld_shared_cache symbol map generator")
    arg_parser.add_argument(
        "dyld_shared_cache_path", type=str, help="Path to the dyld_shared_cache which should be symbolicated"
    )
    arg_parser.add_argument("output_csv_path", type=str, help="Output CSV path")
    args = arg_parser.parse_args()

    dyld_shared_cache = DyldSharedCacheParser(Path(args.dyld_shared_cache_path))
    symbols: List[Tuple[VirtualMemoryPointer, str, Path]] = []

    # Iterate each image in the DSC, extract it, and record its symbols
    image_count = len(dyld_shared_cache.embedded_binary_info)
    for idx, path in enumerate(dyld_shared_cache.embedded_binary_info.keys()):
        # The DSC has more than 1,000 binaries, so try to free up resources after each image
        MachoAnalyzer.clear_cache()

        logging.info(f"({idx+1}/{image_count}) Symbolicating {path}...")
        try:
            binary = dyld_shared_cache.get_embedded_binary(path)
            analyzer = MachoAnalyzer.get_analyzer(binary)
            for sym, addr in analyzer.exported_symbol_names_to_pointers.items():
                symbols.append((VirtualMemoryPointer(addr), sym, path))
        except Exception:
            logging.error(f"Failed to symbolicate {path}")
            continue

    with open(str(args.output_csv_path), "w", newline="") as output_csv:
        csv_writer = csv.writer(output_csv, delimiter=",", quoting=csv.QUOTE_MINIMAL)
        for row in symbols:
            csv_writer.writerow(row)
def find_selector_implementations(binary):
    print(f"Analyzing Mach-O slice built for {CPU_TYPE(binary.cpu_type).name}")
    analyzer = MachoAnalyzer(binary)

    desired_selector = "URLSession:didReceiveChallenge:completionHandler:"
    implementations = analyzer.get_imps_for_sel(desired_selector)
    for imp_function in implementations:
        instruction_size = 4
        instruction_count = int(
            (imp_function.end_address - imp_function.start_address) /
            instruction_size)
        print(
            f"Found implementation of @selector({desired_selector}) at [{hex(imp_function.start_address)}"
            f" - {hex(imp_function.end_address)}] ({instruction_count} instructions)"
        )
Exemple #3
0
def print_binary_symbols(binary: MachoBinary, verbose: bool = True) -> None:
    print(
        f"\n{binary.path.as_posix()} (for architecture {binary.cpu_type.name.lower()})"
    )

    # Parsing the symbol table requires a MachoAnalyzer
    analyzer = MachoAnalyzer.get_analyzer(binary)

    # Print imported symbols
    for sym in analyzer.imported_symbols:
        segment = "U"
        source_library_info = ""  # Only include this when the verbose flag is set
        if verbose:
            segment = "(undefined)"
            source_library_info = f"(from {get_source_library_of_imported_symbol(analyzer, sym)})"

        # To match nm output, indent everything by the length of a 64-bit virtual address
        indent = " " * 11
        print(f"{indent} {segment} {sym} {source_library_info}")

    # Print exported symbols
    for addr, sym in analyzer.exported_symbol_pointers_to_names.items():
        section = binary.section_for_address(addr)
        section_name = section.name.decode()

        if verbose:
            # In verbose mode, report the source section as (segment,section)
            segment_name = section.cmd.segname.decode()
            section_name = f"({segment_name},{section_name})"
        else:
            # In non-verbose mode, report the source section as the first letter of the section name
            # Trim out the '__' prefix
            section_name = section_name[2:3].upper()

        print(f"{addr:#011x} {section_name} {sym}")
Exemple #4
0
    def get_function_analyzer_for_method(
            cls, binary: MachoBinary,
            method_info: ObjcMethodInfo) -> "ObjcFunctionAnalyzer":
        """Get the shared analyzer describing an Objective-C method within the Mach-O binary
        This method performs the same caching as get_function_analyzer()

        Args:
            binary: The MachoBinary containing a function at method_info.imp_addr
            method_info: The ObjcMethodInfo describing the IMP to be analyzed

        Returns:
            An ObjcFunctionAnalyzer suitable for introspecting the provided method

        Raises:
            ValueError: Could not get function instructions for the provided method
        """
        # TODO(PT): it seems like this & related methods should be moved to MachoAnalyzer
        if not method_info.imp_addr:
            raise ValueError(
                f"Could not get method implementation address for {method_info}"
            )

        from strongarm.macho.macho_analyzer import MachoAnalyzer

        analyzer = MachoAnalyzer.get_analyzer(binary)
        instructions = analyzer.get_function_instructions(method_info.imp_addr)
        return ObjcFunctionAnalyzer(binary,
                                    instructions,
                                    method_info=method_info)
Exemple #5
0
    def __init__(self,
                 binary: MachoBinary,
                 instructions: List[CsInsn],
                 method_info: ObjcMethodInfo = None) -> None:
        from strongarm.macho import MachoAnalyzer

        try:
            self.start_address = VirtualMemoryPointer(instructions[0].address)
            last_instruction = instructions[len(instructions) - 1]
            # The end-address is right-exclusive
            self.end_address = VirtualMemoryPointer(
                last_instruction.address) + MachoBinary.BYTES_PER_INSTRUCTION
        except IndexError:
            # this method must have just been a stub with no real instructions!
            self.start_address = VirtualMemoryPointer(0)
            self.end_address = VirtualMemoryPointer(0)

        self.binary = binary
        self.macho_analyzer = MachoAnalyzer.get_analyzer(binary)
        self.instructions = instructions
        self.method_info = method_info

        self._call_targets: Optional[List[ObjcBranchInstruction]] = None

        # Find basic-block-boundaries upfront
        self.basic_blocks = self._find_basic_blocks()
Exemple #6
0
def main() -> None:
    # XXX(PT): Change this if you want to run a quick script! Write it in strongarm_script()
    script = False
    # end of config

    arg_parser = argparse.ArgumentParser(description="Mach-O Analyzer")
    arg_parser.add_argument("--verbose",
                            action="store_true",
                            help="Output extra info while analyzing")
    arg_parser.add_argument("binary_path",
                            metavar="binary_path",
                            type=str,
                            help="Path to binary to analyze")
    args = arg_parser.parse_args()

    def configure_logger() -> None:
        root = logging.getLogger()
        root.setLevel(logging.DEBUG)

        ch = logging.StreamHandler(sys.stdout)
        ch.setLevel(logging.INFO)
        formatter = logging.Formatter(
            "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        root.addHandler(ch)

    configure_logger()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    print_header(args)

    parser = MachoParser(pathlib.Path(args.binary_path))

    # print slice info
    print("Slices:")
    for macho_slice in parser.slices:
        print(f"\t{macho_slice.cpu_type.name} Mach-O slice")

    binary = pick_macho_slice(parser)
    print(f"Reading {binary.cpu_type.name} slice\n\n")

    analyzer = MachoAnalyzer.get_analyzer(binary)
    shell = StrongarmShell(binary, analyzer)

    if script:
        print("Running provided script...\n\n")
        strongarm_script(binary, analyzer)
    else:
        autorun_cmd = "info metadata segments sections loads"
        print(f"Auto-running '{autorun_cmd}'\n\n")
        shell.run_command(autorun_cmd)

        # this will return False once the shell exists
        while shell.process_command():
            pass
    print("May your arms be beefy and your binaries unencrypted")
Exemple #7
0
def print_analyzer_classes(analyzer: MachoAnalyzer) -> None:
    print("\nObjective-C Classes:")
    classes = analyzer.objc_classes()
    classes = sorted(classes, key=lambda c: c.name)
    for objc_class in classes:
        # Belongs to a class or category?
        if isinstance(objc_class, ObjcCategory):
            category: ObjcCategory = objc_class
            class_name = f"{category.base_class} ({category.name})"
        else:
            class_name = objc_class.name
        print(f"\t{class_name}: {len(objc_class.selectors)} selectors")
    def setup_method(self) -> None:
        parser = MachoParser(TestFunctionAnalyzer.FAT_PATH)
        self.binary = parser.slices[0]
        self.analyzer = MachoAnalyzer.get_analyzer(self.binary)

        self.implementations = self.analyzer.get_imps_for_sel(
            "URLSession:didReceiveChallenge:completionHandler:")
        self.instructions = self.implementations[0].instructions

        self.imp_addr = self.instructions[0].address
        assert self.imp_addr == TestFunctionAnalyzer.URL_SESSION_DELEGATE_IMP_ADDR

        self.function_analyzer = ObjcFunctionAnalyzer(self.binary,
                                                      self.instructions)
Exemple #9
0
    def get_function_analyzer_for_signature(
            cls, binary: MachoBinary, class_name: str,
            sel_name: str) -> "ObjcFunctionAnalyzer":
        from strongarm.macho.macho_analyzer import MachoAnalyzer

        analyzer = MachoAnalyzer.get_analyzer(binary)
        for objc_cls in analyzer.objc_classes():
            if objc_cls.name == class_name:
                for sel in objc_cls.selectors:
                    if sel.name == sel_name:
                        # XXX(PT): where are the method info's normally stored? Can we grab it from there?
                        method_info = ObjcMethodInfo(objc_cls, sel,
                                                     sel.implementation)
                        return ObjcFunctionAnalyzer.get_function_analyzer_for_method(
                            binary, method_info)
        raise RuntimeError(
            f"No found function analyzer for -[{class_name} {sel_name}]")
Exemple #10
0
def main():
    arg_parser = argparse.ArgumentParser(description="strings clone")
    arg_parser.add_argument(
        "binary_path",
        metavar="binary_path",
        type=str,
        help="Path to binary whose strings should be printed")
    args = arg_parser.parse_args()

    parser = MachoParser(pathlib.Path(args.binary_path))

    # Get the unique strings from all slices
    all_strings = set()
    for fat_slice in parser.slices:
        # Parsing the string table requires a MachoAnalyzer
        analyzer = MachoAnalyzer.get_analyzer(fat_slice)
        all_strings.update(analyzer.strings())

    for string in all_strings:
        print(string)
Exemple #11
0
    def get_function_analyzer(
            cls, binary: MachoBinary,
            start_address: VirtualMemoryPointer) -> "ObjcFunctionAnalyzer":
        """Get the shared analyzer for the function at start_address in the binary.

        This method uses a cached MachoAnalyzer if available, which is more efficient than analyzing the
        same binary over and over. Therefore, this method should be used when an ObjcFunctionAnalyzer is needed,
        instead of constructing it yourself.

        Args:
            binary: The MachoBinary containing a function at start_address
            start_address: The entry point address for the function to be analyzed

        Returns:
            An ObjcFunctionAnalyzer suitable for introspecting a block of code.
        """
        from strongarm.macho.macho_analyzer import MachoAnalyzer

        analyzer = MachoAnalyzer.get_analyzer(binary)
        instructions = analyzer.get_function_instructions(start_address)
        return ObjcFunctionAnalyzer(binary, instructions)
 def test_three_op_add(self) -> None:
     # 0x000000010000665c         adrp       x0, #0x102a41000
     # 0x0000000100006660         add        x0, x0, #0x458
     # 0x0000000100006664         bl         0x101f8600c
     three_op_binary = pathlib.Path(
         __file__).parent / "bin" / "ThreeOpAddInstruction"
     binary = MachoParser(three_op_binary).get_arm64_slice()
     assert binary
     analyzer = MachoAnalyzer.get_analyzer(binary)
     function_analyzer = ObjcFunctionAnalyzer(
         binary,
         analyzer.get_function_instructions(
             VirtualMemoryPointer(0x10000665C)))
     target_instr = function_analyzer.get_instruction_at_address(
         VirtualMemoryPointer(0x100006664))
     wrapped_instr = ObjcInstruction.parse_instruction(
         function_analyzer, target_instr)
     contents = function_analyzer.get_register_contents_at_instruction(
         "x0", wrapped_instr)
     assert contents.type == RegisterContentsType.IMMEDIATE
     assert contents.value == 0x102A41458
Exemple #13
0
def main():
    arg_parser = argparse.ArgumentParser(description="classdump clone")
    arg_parser.add_argument(
        "binary_path",
        metavar="binary_path",
        type=str,
        help="Path to binary to print Objective-C class information")
    args = arg_parser.parse_args()

    parser = MachoParser(pathlib.Path(args.binary_path))

    # Find a binary slice, preferring arm64 if available
    arm64_slices = [x for x in parser.slices if x.cpu_type == CPU_TYPE.ARM64]
    binary = arm64_slices[0] if len(arm64_slices) else parser.slices[0]
    analyzer = MachoAnalyzer.get_analyzer(binary)

    for objc_class in analyzer.objc_classes() + analyzer.objc_categories():
        # Print the opening line of the declaration
        class_declaration = f"@interface {objc_class.name} : NSObject"
        if len(objc_class.protocols):
            protocol_list = ", ".join(x.name for x in objc_class.protocols)
            class_declaration += f" <{protocol_list}>"
        print(class_declaration)

        # Print the ivar list
        print("{")
        for ivar in objc_class.ivars:
            # The ivar's class name will be @"enclosed" if it's an Objective-C class. Strip this.
            class_name = ivar.class_name.strip('@"')
            print(f"\t{class_name}* {ivar.name};")
        print("}")

        # Print the method list
        for method in objc_class.selectors:
            # TODO(PT): Guess argument types by using the selector's type encoding
            print(_prototype_from_selector(method.name))

        print(f"@end\n")
Exemple #14
0
def binary_containing_code(
    code_inside_objc_class: str,
    is_assembly: bool,
    code_outside_objc_class: str = ""
) -> Generator[Tuple[MachoBinary, MachoAnalyzer], None, None]:
    """Provide an app package which contains the compiled source code.
    If is_assembly is set, the source code is treated as AArch64 assembly. Otherwise, as Objective-C source.

    The provided source code is embedded within a class definition.
    If you need to embed code outside a class definition, pass it as code_outside_objc_class.

    This method will cache the compiled binary in tests/bin/source_code_test_binaries.
    This facilitates running the unit tests using this mechanism in Pipelines.
    """
    # TODO(PT): When you modify source code of a unit test, it means there is a 'dangling' unused binary in the tree.
    # Add a cleanup task to identify these unused binaries and delete them.

    # Do we need to compile this code, or is there a cached version available?
    code_hash = hashlib.md5(
        f"{code_inside_objc_class}{code_outside_objc_class}".encode(
        )).hexdigest()
    compiled_artifacts_dir = pathlib.Path(
        __file__).parent / "bin" / "auto_compiled_binaries"
    compiled_code_bin_path = compiled_artifacts_dir / str(code_hash)
    if not compiled_code_bin_path.exists():
        # Compile and cache this source code
        with _compile_code(code_inside_objc_class,
                           is_assembly,
                           code_outside_objc_class=code_outside_objc_class
                           ) as temp_compiled_bin:
            shutil.copy(temp_compiled_bin, compiled_code_bin_path)

    binary = MachoParser(compiled_code_bin_path).get_arm64_slice()
    assert binary is not None
    analyzer = MachoAnalyzer.get_analyzer(binary)
    yield binary, analyzer
    def test_analyze_embedded_binary(
            self, dyld_shared_cache: DyldSharedCacheParser) -> None:
        # Given I parse an embedded binary
        binary = dyld_shared_cache.get_embedded_binary(
            Path("/usr/lib/libSystem.B.dylib"))
        # The binary appears to be parsed correctly
        assert binary.get_virtual_base() == 0x18002E000
        assert binary.get_functions() == {
            0x18002FA7C, 0x18002FB7C, 0x18002FB34, 0x18002FB58, 0x18002FBBC
        }

        # And the binary can be analyzed further
        analyzer = MachoAnalyzer.get_analyzer(binary)
        # And the analyzed binary reports the correct information
        assert len(analyzer.imported_symbols) == 47
        expected_exports = {
            "<redacted>": 0x18002FBBC,
            "___crashreporter_info__": 0x1B7C574B8,
            "_libSystem_atfork_child": 0x18002FB7C,
            "_libSystem_atfork_parent": 0x18002FB58,
            "_libSystem_atfork_prepare": 0x18002FB34,
            "_mach_init_routine": 0x1B7C574B0,
        }
        assert analyzer.exported_symbol_names_to_pointers == expected_exports
from strongarm.macho import MachoAnalyzer, MachoParser
from strongarm.objc import CodeSearch, CodeSearchTermCallDestination

binary = MachoParser("./tests/bin/StrongarmTarget").get_arm64_slice()
analyzer = MachoAnalyzer(binary)

# we do not specify a class, because this is an NSURLSessionDelegate method and we don't
# know which class will implement it
desired_selector = "URLSession:didReceiveChallenge:completionHandler:"
implementations = analyzer.get_imps_for_sel(desired_selector)
for imp_function in implementations:
    log_search = CodeSearch(
        [CodeSearchTermCallDestination(binary, invokes_symbol="_NSLog")])
    for search_result in imp_function.search_call_graph(log_search):
        function_containing_log_call = search_result.found_function
        print(
            f"Found a reachable code branch which calls NSLog originating from source function"
            f" {hex(function_containing_log_call.start_address)} at {hex(search_result.found_instruction.address)}"
        )
# TODO(PT): This file is out of date
from strongarm.macho import MachoAnalyzer, MachoParser
from strongarm.objc import CodeSearch, RegisterContentsType

binary = MachoParser("./tests/bin/StrongarmControlFlowTarget").get_arm64_slice()
analyzer = MachoAnalyzer(binary)

log_search = CodeSearch(
    [
        CodeSearchTermCallDestination(binary, invokes_symbol="_printf"),
        CodeSearchTermCallDestination(binary, invokes_symbol="_NSLog"),
    ]
)
search_results = analyzer.queue_code_search(log_search)
for search_result in search_results:
    function_containing_log_call = search_result.found_function
    method_info = function_containing_log_call.method_info
    log_call_instruction = search_result.found_instruction
    print(
        f"Found call to {log_call_instruction.symbol} in -[{method_info.objc_class.name} {method_info.objc_sel.name}]"
        f" at {hex(method_info.imp_addr)}:"
    )

    string_arg = function_containing_log_call.get_register_contents_at_instruction(
        register="r0", instruction=log_call_instruction
    )
    # the string passed to the log call may have been passed as an argument to this function
    if string_arg.type == RegisterContentsType.FUNCTION_ARG:
        print(
            f"\t{log_call_instruction.symbol}() called with a string passed to function"
            f" {hex(function_containing_log_call.start_address)} in argument #{string_arg.value}"
Exemple #18
0
def print_analyzer_methods(analyzer: MachoAnalyzer) -> None:
    print("\nObjective-C Methods:")
    methods = analyzer.get_objc_methods()
    for method_info in methods:
        print_selector(method_info.objc_class, method_info.objc_sel)
Exemple #19
0
def print_analyzer_protocols(analyzer: MachoAnalyzer) -> None:
    print("\nProtocols conformed to within the binary:")
    protocols = analyzer.get_conformed_protocols()
    protocols = sorted(protocols, key=lambda p: p.name)
    for protocol in protocols:
        print(f"\t{protocol.name}: {len(protocol.selectors)} selectors")