def get_dex_method_at_address(address=None, program=None, header=None): if program is None: program = getState().getCurrentProgram() address = ghidra_utils.get_address(address=address, program=program) insn = ghidra_utils.get_instruction_at_address(address=address, program=program) if header is None: header = dex_header(program) # TODO cache this? if insn is not None and "invoke" in insn.getMnemonicString(): # this is the calling line - get the dex method ref at the start idx = insn.getOpObjects(0)[0].getValue() # note: getOperandReferences(0) gives reference to target fn elif header.methods.getOffset() <= address.getOffset( ) and address.getOffset() < ( header.methods.getOffset() + (header.num_methods * SIZE_OF_METHOD_ELEMENTS)): # lies within the dex method range, this is the dex method line idx = (address.getOffset() - header.methods.getOffset()) / \ SIZE_OF_METHOD_ELEMENTS else: raise Exception( "Didn't find invoke or dex method ref at {} in {}. Perhaps you want get_dex_method_for_function()" .format(address, program)) return header.get_method(idx)
def __init__(self, program=None): """ Create a header object, based on the dex component """ if program is None: program = currentProgram self.program = program header_addr = ghidra_utils.get_address(0, program=self.program) header_dict = ghidra_utils.dictify(header_addr, program=self.program) self.ea = header_addr self.address = header_addr self.num_strings = header_dict["stringIdsSize"] self.str_ids = self.address.add(header_dict["stringIdsOffset"]) self.types = self.address.add(header_dict["typeIdsOffset"]) self.proto = self.address.add(header_dict["protoIdsOffset"]) self.num_fields = header_dict["fieldIdsSize"] self.fields = self.address.add(header_dict["fieldIdsOffset"]) self.num_methods = header_dict["methodIdsSize"] self.methods = self.address.add(header_dict["methodIdsOffset"]) self.num_classdefs = header_dict["classDefsIdsSize"] self.classdefs = self.address.add(header_dict["classDefsIdsOffset"]) self.data = self.address.add(header_dict["dataOffset"]) self.string_cache = None
def __init__(self, dex_hdr, address): self.instance_fields = [] self.static_fields = [] self.virtual_methods = [] self.direct_methods = [] self.address = address self.ea = self.address self.dex_hdr = dex_hdr data_iterator = dex_hdr.program.getListing().getData( ghidra_utils.get_address(self.address), True) # note: this only grabs the numbers for each list of entries (because the actual entries are treated as seperate data items by ghidra classdata_dict = ghidra_utils.dictify(data_iterator.next(), program=dex_hdr.program) prev_idx = 0 for idx in range( 0, decode_uleb128_bytes_to_int(classdata_dict["static_fields"])): encoded_field = encoded_dex_field( dex_hdr, data_iterator.next().getAddress(), prev_idx) prev_idx = encoded_field.field_idx self.static_fields.append(encoded_field.field) prev_idx = 0 for idx in range( 0, decode_uleb128_bytes_to_int( classdata_dict["instance_fields"])): encoded_field = encoded_dex_field( dex_hdr, data_iterator.next().getAddress(), prev_idx) prev_idx = encoded_field.field_idx self.instance_fields.append(encoded_field.field) prev_idx = 0 for idx in range( 0, decode_uleb128_bytes_to_int(classdata_dict["direct_methods"])): encoded_method = encoded_dex_method( dex_hdr, data_iterator.next().getAddress(), prev_idx) prev_idx = encoded_method.method_idx self.direct_methods.append(encoded_method.method) prev_idx = 0 for idx in range( 0, decode_uleb128_bytes_to_int( classdata_dict["virtual_methods"])): encoded_method = encoded_dex_method( dex_hdr, data_iterator.next().getAddress(), prev_idx) prev_idx = encoded_method.method_idx self.virtual_methods.append(encoded_method.method)
def get_dex_method_for_function(function): program = function.program header = get_all_dex_headers()[str(program)] # boo, can only get the method id offset from the comment method_id_offset = ghidra_utils.get_address( function.getComment().split("Method ID Offset: ")[1].strip(), program=program) idx = method_id_offset.subtract(header.methods) / SIZE_OF_METHOD_ELEMENTS return header.get_method(idx)
def find_interface_calls(headers=None, clazz=None, method_name=None, proto=None, dex_method=None, args=None, quiet=False): """ Find all calls to a given interface. clazz is interface class (so, not the implementing class - IFoo, not FooImpl). Need all of clazz, method_name and proto Speed up by retaining headers and passing in manually Proto is full length prototype string Returns list of call locations Requires xref_invoke_to_dex_method_id analysis to be run to have reference to the method id location. """ if dex_method is not None: clazz = dex_method.clazz method_name = dex_method.name proto = str(dex_method.method_prototype) # TODO - would be nice to have a way to go from a concrete class to find the higher interface class? if clazz is None or method_name is None or proto is None: raise Exception( "Need all of clazz, method_name and proto to find interface calls") method_instances = find_dex_methods(headers=headers, clazz=clazz, method_name=method_name, proto=proto) """if len(method_instances) < 1: raise Exception("Didn't find method reference for {} {} {}".format(clazz, method_name, proto)) """ # require invoke-interface interface_call_list = [] directly_called = False output = "" #print("Method {}".format(method_instances[0])) for mi in method_instances: refmgr = mi.dex_hdr.program.getReferenceManager() for ref in ghidra_utils.iterate( refmgr.getReferencesTo(ghidra_utils.get_address(mi.address))): directly_called = True call_address = ref.getFromAddress() if args is not None: if not check_invoke_args(call_address.getOffset(), mi.dex_hdr.program, args): # didn't match continue insn = ghidra_utils.get_instruction_at_address( address=call_address.getOffset(), program=mi.dex_hdr.program) if insn.getMnemonicString().startswith("invoke_interface"): interface_call_list.append(call_address.getOffset()) call_func = ghidra_utils.get_function_at_address( call_address, mi.dex_hdr.program) output += "\tCalled by {} ( {} : {} )\n".format( call_func, call_func.getProgram().getName(), call_func.getEntryPoint()) output += "\t\t{}: {}\n".format(call_address, insn) if not directly_called: output += "\tNot directly called\n" elif len(interface_call_list) < 1: output += "\tNot called with desired args\n" if not quiet: print(output) return interface_call_list
def find_calls(headers=None, clazz=None, method_name=None, proto=None, dex_method=None, args=None, quiet=False): """ Find all the calls to a given function. Treats a None as "*". Proto is full length prototype string Speed up by retaining headers and passing in manually Usages: all: Find all usages for a particular method clazz and method_name only: find all calls for a polymorphic method clazz only: Find all calls against given class method_name and proto only: find all interactions that might be an interface Returns dict of the methods, with their calling locations Requires xref_invoke_to_dex_method_id analysis to be run to have reference to the method id location. """ if dex_method is not None: clazz = dex_method.clazz method_name = dex_method.name proto = str(dex_method.method_prototype) # find the matching method references method_instances = find_dex_methods(headers=headers, clazz=clazz, method_name=method_name, proto=proto, dex_method=dex_method) ret_dict = {} output = "" for mi in method_instances: ret_dict[mi] = [] directly_called = False output += "Method {} @ {}: {}\n".format( mi, mi.dex_hdr.program.getName(), hex(int(mi.address.getOffset()))) class_def = dextypes.get_classdef(mi.clazz) if class_def is None: print("Warning: {} couldn't find class def for {}".format( mi, mi.clazz)) refmgr = mi.dex_hdr.program.getReferenceManager() for ref in ghidra_utils.iterate( refmgr.getReferencesTo(ghidra_utils.get_address(mi.address))): directly_called = True call_address = ref.getFromAddress() if args is not None: if not check_invoke_args(call_address.getOffset(), mi.dex_hdr.program, args): # didn't match continue ret_dict[mi].append(call_address.getOffset()) call_func = ghidra_utils.get_function_at_address( call_address, mi.dex_hdr.program) output += "\tCalled by {} ( {} : {} )\n".format( call_func, call_func.getProgram().getName(), call_func.getEntryPoint()) insn = ghidra_utils.get_instruction_at_address( address=call_address.getOffset(), program=mi.dex_hdr.program) output += "\t\t{}: {}\n".format(call_address, insn) if not directly_called: output += "\tNot directly called\n" elif len(ret_dict[mi]) < 1: output += "\tNot called with desired args\n" if not quiet: print(output) return ret_dict
def find_callers(dex_method=None, function=None, headers=None, include_interface=True, include_virtual=True, include_super=False): """ Find all the possible callers to a method. Returns list of tuples of (address,program) of callers include super is off by default, because it needs to walk every classdef to see if it might be a child of this one """ if headers is None: headers = dextypes.get_all_dex_headers() if function is not None: dex_method = dextypes.get_dex_method_for_function(function) clazz = dex_method.clazz method_name = dex_method.name proto = str(dex_method.method_prototype) # first - direct calls # find the matching method references method_instances = find_dex_methods(headers=headers, clazz=clazz, method_name=method_name, proto=proto) for mi in method_instances: refmgr = mi.dex_hdr.program.getReferenceManager() for ref in refmgr.getReferencesTo(ghidra_utils.get_address( mi.address)): address = ref.getFromAddress() insn = ghidra_utils.get_instruction_at_address( address=address, program=mi.dex_hdr.program) yield (address, mi.dex_hdr.program, insn) if include_interface: for classdef in find_potential_invoke_interface_classes( headers=headers, clazz=clazz, method_name=method_name, proto=proto): method_instances = find_dex_methods(headers=headers, clazz=classdef.name, method_name=method_name, proto=proto) for mi in method_instances: refmgr = mi.dex_hdr.program.getReferenceManager() for ref in refmgr.getReferencesTo( ghidra_utils.get_address(mi.address)): address = ref.getFromAddress() insn = ghidra_utils.get_instruction_at_address( address=address, program=mi.dex_hdr.program) # make sure it's an invoke_interface - should always be, but let's check assert insn.getMnemonicString().startswith( "invoke_interface" ), "Interface wasn't called with invoke interface? {} in {} for {}".format( address, mi.dex_hdr.program, mi) yield (address, mi.dex_hdr.program, insn) if include_virtual: for classdef in find_potential_invoke_virtual_classes( headers=headers, clazz=clazz, method_name=method_name, proto=proto): method_instances = find_dex_methods(headers=headers, clazz=classdef.name, method_name=method_name, proto=proto) for mi in method_instances: refmgr = mi.dex_hdr.program.getReferenceManager() for ref in refmgr.getReferencesTo( ghidra_utils.get_address(mi.address)): address = ref.getFromAddress() insn = ghidra_utils.get_instruction_at_address( address=address, program=mi.dex_hdr.program) # make sure it's an invoke_virtual if insn.getMnemonicString().startswith("invoke_virtual"): yield (address, mi.dex_hdr.program, insn) if include_super: for classdef in find_potential_invoke_super_classes( headers=headers, clazz=clazz, method_name=method_name, proto=proto): method_instances = find_dex_methods(headers=headers, clazz=classdef.name, method_name=method_name, proto=proto) for mi in method_instances: refmgr = mi.dex_hdr.program.getReferenceManager() for ref in refmgr.getReferencesTo( ghidra_utils.get_address(mi.address)): address = ref.getFromAddress() insn = ghidra_utils.get_instruction_at_address( address=address, program=mi.dex_hdr.program) # make sure it's an invoke_super if insn.getMnemonicString().startswith("invoke_super"): yield (address, mi.dex_hdr.program, insn)
def xref_invoke_to_dex_method_id(program=None): if program is None: program = currentProgram monitor.setMessage("Linking invokes to dex method ids in {}".format( program.getName())) success = False transaction_id = program.startTransaction( "xref_invoke_to_dex_method_id analysis") try: dex_header = dextypes.dex_header(program=program) refmgr = program.getReferenceManager() model = ghidra.program.model.block.BasicBlockModel(program, True) funcmgr = program.getFunctionManager() monitor.initialize(funcmgr.getFunctionCount()) for function in ghidra_utils.iterate(funcmgr.getFunctions(True)): #print("{} @ {} : {}".format(function, program, function.getEntryPoint())) monitor.checkCanceled() monitor.incrementProgress(1) visited_offsets = [] to_visit = [ model.getFirstCodeBlockContaining(function.getEntryPoint(), monitor) ] while len(to_visit) > 0: monitor.checkCanceled() block = to_visit.pop() visited_offsets.append( block.getFirstStartAddress().getOffset()) for dest in ghidra_utils.iterate( block.getDestinations(monitor)): monitor.checkCanceled() if dest.getFlowType().isCall(): #print("Call @ {} : {}".format(program, address=dest.getReferent())) # invoke call site insn = ghidra_utils.get_instruction_at_address( address=dest.getReferent(), program=program) if "invoke" not in insn.getMnemonicString(): raise Exception( "Unknown call insn: {} @ {}: {}".format( insn, program, insn.getAddress())) # it's an invocation! find the operand for the method ref method_idx = insn.getOpObjects(0)[0].getValue() method = dex_header.get_method(method_idx) # create the reference # from the insn, to the dex method id, as a data read, with an analysis source, and the op index is always 0 for an invoke refmgr.addMemoryReference( insn.getAddress(), ghidra_utils.get_address(method.address), ghidra.program.model.symbol.RefType.READ, ghidra.program.model.symbol.SourceType.ANALYSIS, 0) else: dest_offset = dest.getDestinationAddress().getOffset() if dest_offset not in visited_offsets: if dest_offset not in [ tv.getFirstStartAddress().getOffset() for tv in to_visit ]: to_visit.append(dest.getDestinationBlock()) success = True finally: # on exception, success will be false, so the transaction will be rolled back program.endTransaction(transaction_id, success)