Exemple #1
0
def get_dex_method_at_address(address=None, program=None, header=None):
    if program is None:
        program = getState().getCurrentProgram()

    address = ghidra_utils.get_address(address=address, program=program)
    insn = ghidra_utils.get_instruction_at_address(address=address,
                                                   program=program)

    if header is None:
        header = dex_header(program)  # TODO cache this?

    if insn is not None and "invoke" in insn.getMnemonicString():
        # this is the calling line - get the dex method ref at the start
        idx = insn.getOpObjects(0)[0].getValue()
        # note: getOperandReferences(0) gives reference to target fn
    elif header.methods.getOffset() <= address.getOffset(
    ) and address.getOffset() < (
            header.methods.getOffset() +
        (header.num_methods * SIZE_OF_METHOD_ELEMENTS)):
        # lies within the dex method range, this is the dex method line
        idx = (address.getOffset() - header.methods.getOffset()) / \
            SIZE_OF_METHOD_ELEMENTS
    else:
        raise Exception(
            "Didn't find invoke or dex method ref at {} in {}. Perhaps you want get_dex_method_for_function()"
            .format(address, program))

    return header.get_method(idx)
Exemple #2
0
    def __init__(self, program=None):
        """ Create a header object, based on the dex component """

        if program is None:
            program = currentProgram
        self.program = program

        header_addr = ghidra_utils.get_address(0, program=self.program)

        header_dict = ghidra_utils.dictify(header_addr, program=self.program)

        self.ea = header_addr
        self.address = header_addr

        self.num_strings = header_dict["stringIdsSize"]
        self.str_ids = self.address.add(header_dict["stringIdsOffset"])
        self.types = self.address.add(header_dict["typeIdsOffset"])
        self.proto = self.address.add(header_dict["protoIdsOffset"])
        self.num_fields = header_dict["fieldIdsSize"]
        self.fields = self.address.add(header_dict["fieldIdsOffset"])
        self.num_methods = header_dict["methodIdsSize"]
        self.methods = self.address.add(header_dict["methodIdsOffset"])
        self.num_classdefs = header_dict["classDefsIdsSize"]
        self.classdefs = self.address.add(header_dict["classDefsIdsOffset"])
        self.data = self.address.add(header_dict["dataOffset"])

        self.string_cache = None
Exemple #3
0
    def __init__(self, dex_hdr, address):

        self.instance_fields = []
        self.static_fields = []
        self.virtual_methods = []
        self.direct_methods = []

        self.address = address
        self.ea = self.address
        self.dex_hdr = dex_hdr

        data_iterator = dex_hdr.program.getListing().getData(
            ghidra_utils.get_address(self.address), True)

        # note: this only grabs the numbers for each list of entries (because the actual entries are treated as seperate data items by ghidra
        classdata_dict = ghidra_utils.dictify(data_iterator.next(),
                                              program=dex_hdr.program)

        prev_idx = 0
        for idx in range(
                0,
                decode_uleb128_bytes_to_int(classdata_dict["static_fields"])):
            encoded_field = encoded_dex_field(
                dex_hdr,
                data_iterator.next().getAddress(), prev_idx)
            prev_idx = encoded_field.field_idx
            self.static_fields.append(encoded_field.field)

        prev_idx = 0
        for idx in range(
                0,
                decode_uleb128_bytes_to_int(
                    classdata_dict["instance_fields"])):
            encoded_field = encoded_dex_field(
                dex_hdr,
                data_iterator.next().getAddress(), prev_idx)
            prev_idx = encoded_field.field_idx
            self.instance_fields.append(encoded_field.field)

        prev_idx = 0
        for idx in range(
                0,
                decode_uleb128_bytes_to_int(classdata_dict["direct_methods"])):
            encoded_method = encoded_dex_method(
                dex_hdr,
                data_iterator.next().getAddress(), prev_idx)
            prev_idx = encoded_method.method_idx
            self.direct_methods.append(encoded_method.method)

        prev_idx = 0
        for idx in range(
                0,
                decode_uleb128_bytes_to_int(
                    classdata_dict["virtual_methods"])):
            encoded_method = encoded_dex_method(
                dex_hdr,
                data_iterator.next().getAddress(), prev_idx)
            prev_idx = encoded_method.method_idx
            self.virtual_methods.append(encoded_method.method)
Exemple #4
0
def get_dex_method_for_function(function):
    program = function.program

    header = get_all_dex_headers()[str(program)]

    # boo, can only get the method id offset from the comment
    method_id_offset = ghidra_utils.get_address(
        function.getComment().split("Method ID Offset: ")[1].strip(),
        program=program)
    idx = method_id_offset.subtract(header.methods) / SIZE_OF_METHOD_ELEMENTS

    return header.get_method(idx)
Exemple #5
0
def find_interface_calls(headers=None,
                         clazz=None,
                         method_name=None,
                         proto=None,
                         dex_method=None,
                         args=None,
                         quiet=False):
    """ Find all calls to a given interface.
        clazz is interface class (so, not the implementing class - IFoo, not FooImpl). Need all of clazz, method_name and proto 

        Speed up by retaining headers and passing in manually

        Proto is full length prototype string

        Returns list of call locations 

        Requires xref_invoke_to_dex_method_id analysis to be run to have reference to the method id location.
    """
    if dex_method is not None:
        clazz = dex_method.clazz
        method_name = dex_method.name
        proto = str(dex_method.method_prototype)

    # TODO - would be nice to have a way to go from a concrete class to find the higher interface class?
    if clazz is None or method_name is None or proto is None:
        raise Exception(
            "Need all of clazz, method_name and proto to find interface calls")

    method_instances = find_dex_methods(headers=headers,
                                        clazz=clazz,
                                        method_name=method_name,
                                        proto=proto)
    """if len(method_instances) < 1:
        raise Exception("Didn't find method reference for {} {} {}".format(clazz, method_name, proto))
    """

    # require invoke-interface
    interface_call_list = []
    directly_called = False

    output = ""

    #print("Method {}".format(method_instances[0]))
    for mi in method_instances:
        refmgr = mi.dex_hdr.program.getReferenceManager()
        for ref in ghidra_utils.iterate(
                refmgr.getReferencesTo(ghidra_utils.get_address(mi.address))):
            directly_called = True
            call_address = ref.getFromAddress()

            if args is not None:
                if not check_invoke_args(call_address.getOffset(),
                                         mi.dex_hdr.program, args):
                    # didn't match
                    continue

            insn = ghidra_utils.get_instruction_at_address(
                address=call_address.getOffset(), program=mi.dex_hdr.program)

            if insn.getMnemonicString().startswith("invoke_interface"):
                interface_call_list.append(call_address.getOffset())
                call_func = ghidra_utils.get_function_at_address(
                    call_address, mi.dex_hdr.program)
                output += "\tCalled by {} ( {} : {} )\n".format(
                    call_func,
                    call_func.getProgram().getName(),
                    call_func.getEntryPoint())

                output += "\t\t{}: {}\n".format(call_address, insn)

    if not directly_called:
        output += "\tNot directly called\n"
    elif len(interface_call_list) < 1:
        output += "\tNot called with desired args\n"

    if not quiet:
        print(output)

    return interface_call_list
Exemple #6
0
def find_calls(headers=None,
               clazz=None,
               method_name=None,
               proto=None,
               dex_method=None,
               args=None,
               quiet=False):
    """ Find all the calls to a given function. 

        Treats a None as "*". Proto is full length prototype string

        Speed up by retaining headers and passing in manually

        Usages:
            all: Find all usages for a particular method
            clazz and method_name only: find all calls for a polymorphic method
            clazz only: Find all calls against given class
            method_name and proto only: find all interactions that might be an interface

        Returns dict of the methods, with their calling locations

        Requires xref_invoke_to_dex_method_id analysis to be run to have reference to the method id location.
    """

    if dex_method is not None:
        clazz = dex_method.clazz
        method_name = dex_method.name
        proto = str(dex_method.method_prototype)

    # find the matching method references
    method_instances = find_dex_methods(headers=headers,
                                        clazz=clazz,
                                        method_name=method_name,
                                        proto=proto,
                                        dex_method=dex_method)

    ret_dict = {}
    output = ""

    for mi in method_instances:
        ret_dict[mi] = []
        directly_called = False

        output += "Method {} @ {}: {}\n".format(
            mi, mi.dex_hdr.program.getName(), hex(int(mi.address.getOffset())))

        class_def = dextypes.get_classdef(mi.clazz)
        if class_def is None:
            print("Warning: {} couldn't find class def for {}".format(
                mi, mi.clazz))

        refmgr = mi.dex_hdr.program.getReferenceManager()
        for ref in ghidra_utils.iterate(
                refmgr.getReferencesTo(ghidra_utils.get_address(mi.address))):
            directly_called = True
            call_address = ref.getFromAddress()

            if args is not None:
                if not check_invoke_args(call_address.getOffset(),
                                         mi.dex_hdr.program, args):
                    # didn't match
                    continue

            ret_dict[mi].append(call_address.getOffset())
            call_func = ghidra_utils.get_function_at_address(
                call_address, mi.dex_hdr.program)

            output += "\tCalled by {} ( {} : {} )\n".format(
                call_func,
                call_func.getProgram().getName(), call_func.getEntryPoint())
            insn = ghidra_utils.get_instruction_at_address(
                address=call_address.getOffset(), program=mi.dex_hdr.program)
            output += "\t\t{}: {}\n".format(call_address, insn)

        if not directly_called:
            output += "\tNot directly called\n"
        elif len(ret_dict[mi]) < 1:
            output += "\tNot called with desired args\n"

    if not quiet:
        print(output)

    return ret_dict
Exemple #7
0
def find_callers(dex_method=None,
                 function=None,
                 headers=None,
                 include_interface=True,
                 include_virtual=True,
                 include_super=False):
    """ Find all the possible callers to a method. Returns list of tuples of (address,program) of callers
        include super is off by default, because it needs to walk every classdef to see if it might be a child of this one
    """
    if headers is None:
        headers = dextypes.get_all_dex_headers()

    if function is not None:
        dex_method = dextypes.get_dex_method_for_function(function)

    clazz = dex_method.clazz
    method_name = dex_method.name
    proto = str(dex_method.method_prototype)

    # first - direct calls
    # find the matching method references
    method_instances = find_dex_methods(headers=headers,
                                        clazz=clazz,
                                        method_name=method_name,
                                        proto=proto)
    for mi in method_instances:
        refmgr = mi.dex_hdr.program.getReferenceManager()
        for ref in refmgr.getReferencesTo(ghidra_utils.get_address(
                mi.address)):
            address = ref.getFromAddress()
            insn = ghidra_utils.get_instruction_at_address(
                address=address, program=mi.dex_hdr.program)
            yield (address, mi.dex_hdr.program, insn)

    if include_interface:
        for classdef in find_potential_invoke_interface_classes(
                headers=headers, clazz=clazz, method_name=method_name,
                proto=proto):
            method_instances = find_dex_methods(headers=headers,
                                                clazz=classdef.name,
                                                method_name=method_name,
                                                proto=proto)

            for mi in method_instances:
                refmgr = mi.dex_hdr.program.getReferenceManager()
                for ref in refmgr.getReferencesTo(
                        ghidra_utils.get_address(mi.address)):
                    address = ref.getFromAddress()
                    insn = ghidra_utils.get_instruction_at_address(
                        address=address, program=mi.dex_hdr.program)
                    # make sure it's an invoke_interface - should always be, but let's check
                    assert insn.getMnemonicString().startswith(
                        "invoke_interface"
                    ), "Interface wasn't called with invoke interface? {} in {} for {}".format(
                        address, mi.dex_hdr.program, mi)
                    yield (address, mi.dex_hdr.program, insn)

    if include_virtual:
        for classdef in find_potential_invoke_virtual_classes(
                headers=headers, clazz=clazz, method_name=method_name,
                proto=proto):
            method_instances = find_dex_methods(headers=headers,
                                                clazz=classdef.name,
                                                method_name=method_name,
                                                proto=proto)

            for mi in method_instances:
                refmgr = mi.dex_hdr.program.getReferenceManager()
                for ref in refmgr.getReferencesTo(
                        ghidra_utils.get_address(mi.address)):
                    address = ref.getFromAddress()
                    insn = ghidra_utils.get_instruction_at_address(
                        address=address, program=mi.dex_hdr.program)
                    # make sure it's an invoke_virtual
                    if insn.getMnemonicString().startswith("invoke_virtual"):
                        yield (address, mi.dex_hdr.program, insn)

    if include_super:
        for classdef in find_potential_invoke_super_classes(
                headers=headers, clazz=clazz, method_name=method_name,
                proto=proto):
            method_instances = find_dex_methods(headers=headers,
                                                clazz=classdef.name,
                                                method_name=method_name,
                                                proto=proto)

            for mi in method_instances:
                refmgr = mi.dex_hdr.program.getReferenceManager()
                for ref in refmgr.getReferencesTo(
                        ghidra_utils.get_address(mi.address)):
                    address = ref.getFromAddress()
                    insn = ghidra_utils.get_instruction_at_address(
                        address=address, program=mi.dex_hdr.program)
                    # make sure it's an invoke_super
                    if insn.getMnemonicString().startswith("invoke_super"):
                        yield (address, mi.dex_hdr.program, insn)
Exemple #8
0
def xref_invoke_to_dex_method_id(program=None):
    if program is None:
        program = currentProgram

    monitor.setMessage("Linking invokes to dex method ids in {}".format(
        program.getName()))

    success = False
    transaction_id = program.startTransaction(
        "xref_invoke_to_dex_method_id analysis")

    try:
        dex_header = dextypes.dex_header(program=program)
        refmgr = program.getReferenceManager()
        model = ghidra.program.model.block.BasicBlockModel(program, True)

        funcmgr = program.getFunctionManager()
        monitor.initialize(funcmgr.getFunctionCount())
        for function in ghidra_utils.iterate(funcmgr.getFunctions(True)):
            #print("{} @ {} : {}".format(function, program, function.getEntryPoint()))
            monitor.checkCanceled()
            monitor.incrementProgress(1)

            visited_offsets = []
            to_visit = [
                model.getFirstCodeBlockContaining(function.getEntryPoint(),
                                                  monitor)
            ]

            while len(to_visit) > 0:
                monitor.checkCanceled()
                block = to_visit.pop()
                visited_offsets.append(
                    block.getFirstStartAddress().getOffset())

                for dest in ghidra_utils.iterate(
                        block.getDestinations(monitor)):
                    monitor.checkCanceled()
                    if dest.getFlowType().isCall():
                        #print("Call @ {} : {}".format(program, address=dest.getReferent()))
                        # invoke call site
                        insn = ghidra_utils.get_instruction_at_address(
                            address=dest.getReferent(), program=program)
                        if "invoke" not in insn.getMnemonicString():
                            raise Exception(
                                "Unknown call insn: {} @ {}: {}".format(
                                    insn, program, insn.getAddress()))
                        # it's an invocation! find the operand for the method ref
                        method_idx = insn.getOpObjects(0)[0].getValue()
                        method = dex_header.get_method(method_idx)

                        # create the reference
                        # from the insn, to the dex method id, as a data read, with an analysis source, and the op index is always 0 for an invoke
                        refmgr.addMemoryReference(
                            insn.getAddress(),
                            ghidra_utils.get_address(method.address),
                            ghidra.program.model.symbol.RefType.READ,
                            ghidra.program.model.symbol.SourceType.ANALYSIS, 0)

                    else:
                        dest_offset = dest.getDestinationAddress().getOffset()
                        if dest_offset not in visited_offsets:
                            if dest_offset not in [
                                    tv.getFirstStartAddress().getOffset()
                                    for tv in to_visit
                            ]:
                                to_visit.append(dest.getDestinationBlock())
        success = True
    finally:
        # on exception, success will be false, so the transaction will be rolled back
        program.endTransaction(transaction_id, success)