def print_class(classname): """ Print all of a classes fields and methods """ clas = ghidra.app.util.NamespaceUtils.getSymbols(classname, currentProgram) symlist = list(currentProgram.getSymbolTable().getChildren(clas[0])) funclist = [] instancelist = [] staticlist = [] strlist = build_string_list() typelist = build_type_list(strlist) for sym in symlist: func = currentProgram.getFunctionManager().getFunction(sym.getID()) if func is not None: funclist.append("\t0x{}: {}".format(sym.getAddress(), func.getPrototypeString(True, False)).replace("* ", "")) elif sym.getName() == "__classdef__": clasdef = dictify(getDataAt(sym.getAddress())) clasdataaddr = getDataAt(currentProgram.getAddressFactory().getAddress(str(clasdef["classDataOffset"]))) clasdata = dictify(clasdataaddr) # The class data datatypes in Ghidra only seem to parse the array lengths for each array so we have to parse the arrays ourselves curr = getDataAfter(clasdataaddr) for value in clasdata: numvals = bytes_to_int(clasdata[value]) n = 0 fields = get_tree("fields") while n < numvals: if value in ["instance_fields", "static_fields"]: inst_field = bytes_to_int(dictify(curr)["field_idx_diff"]) m = 0 f = None while m < inst_field: f = fields.next() m += 1 field_data = dictify(f) type_str = get_string(field_data["typeIndex"].getUnsignedValue(), typelist) name_str = get_string(field_data["nameIndex"].getUnsignedValue(), strlist) rep = "{} {};".format(type_str, name_str) if value == "static_fields": staticlist.append(rep) else: instancelist.append(rep) curr = getDataAfter(curr) n += 1 print("class {} {}".format(classname, "{")) for i in staticlist: print("\tstatic {}".format(i)) print("") for i in instancelist: print("\t{}".format(i)) print("") for i in funclist: print(i) print("}")
def __init__(self, dex_hdr, idx=None, address=None): if idx is None and address is None: raise Exception("Need either index or address for a classdef") SIZE_OF_CLASSDEF_ELEMENTS = 0x20 self.dex_hdr = dex_hdr if address is None: # determine the address from the index idx = ghidra_utils.enforce_value(idx) address = dex_hdr.classdefs.add(idx * SIZE_OF_CLASSDEF_ELEMENTS) self.address = address self.ea = self.address classdef_dict = ghidra_utils.dictify(self.address, program=dex_hdr.program) class_idx = classdef_dict["classIndex"] self.name = dex_hdr.get_type(class_idx) super_class_idx = classdef_dict["superClassIndex"] self.super_class_type = dex_hdr.get_type(super_class_idx) self.interface_types = [] interface_address = classdef_dict["interfacesOffset"] if interface_address != 0: interface_dict = ghidra_utils.dictify(interface_address, program=dex_hdr.program) num_interfaces = interface_dict["size"] for idx in range(0, num_interfaces): self.interface_types.append( dex_hdr.get_type(interface_dict["item_" + str(idx)]["typeIndex"])) self.access_flags = classdef_dict["accessFlags"] if self.is_interface(): if self.super_class_type != "Ljava/lang/Object;": raise Exception( "hey, found an interface with non-object super {} @ {} in {}, has {}" .format(self.name, self.address, self.dex_hdr.program, self.super_class_type)) classdata_address = classdef_dict["classDataOffset"] # possible if it's a "marker interface" if 0 != ghidra_utils.enforce_raw_address(classdata_address): self.classdata = dex_classdata(dex_hdr, classdata_address)
def __init__(self, program=None): """ Create a header object, based on the dex component """ if program is None: program = currentProgram self.program = program header_addr = ghidra_utils.get_address(0, program=self.program) header_dict = ghidra_utils.dictify(header_addr, program=self.program) self.ea = header_addr self.address = header_addr self.num_strings = header_dict["stringIdsSize"] self.str_ids = self.address.add(header_dict["stringIdsOffset"]) self.types = self.address.add(header_dict["typeIdsOffset"]) self.proto = self.address.add(header_dict["protoIdsOffset"]) self.num_fields = header_dict["fieldIdsSize"] self.fields = self.address.add(header_dict["fieldIdsOffset"]) self.num_methods = header_dict["methodIdsSize"] self.methods = self.address.add(header_dict["methodIdsOffset"]) self.num_classdefs = header_dict["classDefsIdsSize"] self.classdefs = self.address.add(header_dict["classDefsIdsOffset"]) self.data = self.address.add(header_dict["dataOffset"]) self.string_cache = None
def get_function_strings(function): """ Get a list of all of the strings used by a function (including duplicates) """ string_list = [] program = function.getProgram() treemgr = program.getTreeManager() string_data_range = treemgr.getFragment(treemgr.getTreeNames()[0], "string_data") refs = ghidra_utils.get_references_for_function(function) for ref in refs: # don't care about flow references, only data if ref.getReferenceType().isData(): # see if the ref dest is within the strings section address = ref.getToAddress() if string_data_range.contains(address): # yup, it's a string string_data = ghidra_utils.dictify(address, program=program)["data"] # ignore the empty string/whitespace only string_data = string_data.strip() if string_data != "": string_list.append(string_data) return string_list
def __init__(self, dex_hdr, address): self.instance_fields = [] self.static_fields = [] self.virtual_methods = [] self.direct_methods = [] self.address = address self.ea = self.address self.dex_hdr = dex_hdr data_iterator = dex_hdr.program.getListing().getData( ghidra_utils.get_address(self.address), True) # note: this only grabs the numbers for each list of entries (because the actual entries are treated as seperate data items by ghidra classdata_dict = ghidra_utils.dictify(data_iterator.next(), program=dex_hdr.program) prev_idx = 0 for idx in range( 0, decode_uleb128_bytes_to_int(classdata_dict["static_fields"])): encoded_field = encoded_dex_field( dex_hdr, data_iterator.next().getAddress(), prev_idx) prev_idx = encoded_field.field_idx self.static_fields.append(encoded_field.field) prev_idx = 0 for idx in range( 0, decode_uleb128_bytes_to_int( classdata_dict["instance_fields"])): encoded_field = encoded_dex_field( dex_hdr, data_iterator.next().getAddress(), prev_idx) prev_idx = encoded_field.field_idx self.instance_fields.append(encoded_field.field) prev_idx = 0 for idx in range( 0, decode_uleb128_bytes_to_int(classdata_dict["direct_methods"])): encoded_method = encoded_dex_method( dex_hdr, data_iterator.next().getAddress(), prev_idx) prev_idx = encoded_method.method_idx self.direct_methods.append(encoded_method.method) prev_idx = 0 for idx in range( 0, decode_uleb128_bytes_to_int( classdata_dict["virtual_methods"])): encoded_method = encoded_dex_method( dex_hdr, data_iterator.next().getAddress(), prev_idx) prev_idx = encoded_method.method_idx self.virtual_methods.append(encoded_method.method)
def __init__(self, dex_hdr, str_idx): SIZE_OF_STRING_ELEMENTS = 4 self.dex_hdr = dex_hdr str_idx = ghidra_utils.enforce_value(str_idx) str_id_addr = dex_hdr.str_ids.add(str_idx * SIZE_OF_STRING_ELEMENTS) str_id = ghidra_utils.dictify(str_id_addr, program=dex_hdr.program) string_data_addr = dex_hdr.address.add(str_id["stringDataOffset"]) string_data = ghidra_utils.dictify(string_data_addr, program=dex_hdr.program) self.ea = string_data_addr self.address = string_data_addr self.content = string_data["data"]
def build_type_list(strlist): """ Uses an existing mapping of string_ids to addresses to create a mapping of type_ids to addresses """ typeitr = get_tree("types") typelist = [] for t in typeitr: typelist.append(strlist[dictify(t)["descriptorIndex"].getUnsignedValue()]) return typelist
def build_string_list(): """ Returns a list of addresses for each string based on their string ID The list isn't too large and it's much faster only iterating through all the code objects once """ stritr = get_tree("strings") strlist = [] for s in stritr: strlist.append(dictify(s)["stringDataOffset"]) return strlist
def get_types_from_list(self, typelist_offset): if typelist_offset != 0: type_list_addr = self.address.add(typelist_offset) type_list_dict = ghidra_utils.dictify(type_list_addr, program=self.program) num_types = type_list_dict["size"] for i in xrange(num_types): type_index = type_list_dict["item_" + str(i)]["typeIndex"] yield self.get_type(type_index)
def get_supers(symbol, supers, interfaces, header, program): clazzdefaddr, clazzsymbol = get_classdef(symbol, program) if not clazzdefaddr == None and not clazzdefaddr == 0: clazzdef = dictify(clazzdefaddr, program) if not clazzdef["superClassIndex"] == 0xffffffff: superclazz = header.get_type(clazzdef["superClassIndex"]) superstring = superclazz[1:-1].replace( "/", "::") + "::" + str(symbol).split("::")[-1] if not superstring in supers: supers.add(superclazz[1:-1].replace("/", "::") + "::" + str(symbol).split("::")[-1]) supers = get_supers(superclazz[1:-1].replace("/", "::"), supers, interfaces, header, program) if not clazzdef["interfacesOffset"] == 0: typelist = dictify(clazzdef["interfacesOffset"], program) for i in xrange(typelist["size"]): type_index = typelist["item_" + str(i)]["typeIndex"] typer = header.get_type(type_index) interfaces.add(typer[1:-1].replace("/", "::") + "::" + str(symbol).split("::")[-1])
def get_type(self, type_index): '''Get a type by the type index''' SIZE_OF_TYPE_ELEMENTS = 4 type_index = ghidra_utils.enforce_value(type_index) type_addr = self.types.add(type_index * SIZE_OF_TYPE_ELEMENTS) type_str_idx = ghidra_utils.dictify( type_addr, program=self.program)["descriptorIndex"] return str(self.get_string(type_str_idx))
def __init__(self, dex_hdr, address, previous_idx=0): """ Need previous idx, because the field indices are recorded as differences from the previous one """ self.address = address self.ea = self.address self.dex_hdr = dex_hdr encoded_data = ghidra_utils.dictify(self.address, program=dex_hdr.program) self.field_idx = decode_uleb128_bytes_to_int( encoded_data["field_idx_diff"]) + previous_idx self.field = dex_field(dex_hdr, self.field_idx) self.access_flags = decode_uleb128_bytes_to_int( encoded_data["accessFlags"])
def __init__(self, dex_hdr, method_idx): self.dex_hdr = dex_hdr method_idx = ghidra_utils.enforce_value(method_idx) method_address = dex_hdr.methods.add(method_idx * SIZE_OF_METHOD_ELEMENTS) self.address = method_address self.ea = self.address method_dict = ghidra_utils.dictify(method_address, program=dex_hdr.program) method_class_type_idx = method_dict["classIndex"] self.clazz = dex_hdr.get_type(method_class_type_idx) method_prototype_idx = method_dict["protoIndex"] self.prototype = dex_hdr.get_proto(method_prototype_idx) self.shorty = self.prototype.shorty method_name_idx = method_dict["nameIndex"] self.name = str(dex_hdr.get_string(method_name_idx))
def __init__(self, dex_hdr, proto_idx): SIZE_OF_PROTO_ELEMENTS = 12 self.dex_hdr = dex_hdr proto_idx = ghidra_utils.enforce_value(proto_idx) proto_address = dex_hdr.proto.add(proto_idx * SIZE_OF_PROTO_ELEMENTS) self.ea = proto_address self.address = self.ea proto_dict = ghidra_utils.dictify(proto_address, program=dex_hdr.program) # These should be DWords, but only seem to work as words... shorty_str_idx = proto_dict["shortyIndex"] return_type_idx = proto_dict["returnTypeIndex"] parameters_off = proto_dict["parametersOffset"] self.shorty = dex_hdr.get_string(shorty_str_idx) self.parameters = list(dex_hdr.get_types_from_list(parameters_off)) self.return_type = dex_hdr.get_type(return_type_idx)
def __init__(self, dex_hdr, field_idx): SIZE_OF_FIELD_ELEMENTS = 8 self.dex_hdr = dex_hdr field_idx = ghidra_utils.enforce_value(field_idx) field_address = dex_hdr.fields.add(field_idx * SIZE_OF_FIELD_ELEMENTS) self.address = field_address self.ea = self.address field_dict = ghidra_utils.dictify(field_address, program=dex_hdr.program) field_class_type_idx = field_dict["classIndex"] self.clazz = dex_hdr.get_type(field_class_type_idx) field_type_idx = field_dict["typeIndex"] self.type = dex_hdr.get_type(field_type_idx) field_name_str_idx = field_dict["nameIndex"] self.name = dex_hdr.get_string(field_name_str_idx)
def get_string(n, strlist): """ Return the string with string_id n using the address mapping strlist """ return dictify(getDataAt(currentProgram.getAddressFactory().getAddress(str(strlist[n]))))["data"]