def _read_struct_member(struct, sid, union, ea, offset, name, size, asobject): """Read a member into a struct for read_struct.""" flags = idc.GetMemberFlag(sid, offset) assert flags != -1 # Extra information for parsing a struct. member_sid, member_ssize = None, None if idc.isStruct(flags): member_sid = idc.GetMemberStrId(sid, offset) member_ssize = idc.GetStrucSize(member_sid) # Get the address of the start of the member. member = ea if not union: member += offset # Now parse out the value. array = [] processed = 0 while processed < size: value, read = _read_struct_member_once(member + processed, flags, size, member_sid, member_ssize, asobject) assert size % read == 0 array.append(value) processed += read if len(array) == 1: value = array[0] else: value = array struct[name] = value
def getOrigDisasm(self): # type: () -> str """ Gets the original disassembly without any further applied transformations However, the formatting is different from the original and is more convenient for parsing :return: the disassembly """ flags = idc.GetFlags(self.ea) if idc.isCode(flags): disasm = idc.GetDisasm(self.ea) disasm = self._filterComments(disasm) disasm = disasm.replace(' ', ' ') elif idc.isStruct(flags): disasm = self._getStructDisasm() # disasm = "INVALID" elif idc.isAlign(flags): disasm = idc.GetDisasm(self.ea) disasm = self._convertAlignDisasm(disasm) elif idc.isASCII(flags): content = self.getContent() numNewLines = content.count(0x0A) if numNewLines > 1: disasm = '.ascii "' else: disasm = '.asciz "' for i in range(len(content)): if content[i] == 0x00: disasm += '"' elif chr(content[i]) == '"': disasm += '\\\"' elif chr(content[i]) == '\\': disasm += '\\\\' elif content[i] == 0x0A: disasm += '\\n' numNewLines -= 1 if numNewLines > 1: disasm += '"\n\t.ascii "' elif numNewLines == 1: disasm += '"\n\t.asciz "' elif chr(content[i]) == ' ': disasm += ' ' elif not chr(content[i]).isspace(): disasm += chr(content[i]) else: # TODO [INVALID] arm-none-eabi doesn't recognize \xXX? \x seems to become a byte. disasm += '\\x%02X' % content[i] elif idc.isData(flags): disasm = self._getDataDisasm() else: disasm = idc.GetDisasm(self.ea) disasm = self._filterComments(disasm) disasm = disasm.replace(' ', ' ') # parse force command if '<force>' in self.getComment(): comment = self.getComment() disasm = comment[comment.index('<force> ') + len('<force> '):] return disasm
def isDefined(ea): flags = idaapi.getFlags(ea) if not idc.isStruct(flags): return False if not idc.isHead(flags): return False # TODO: verify the actual struct type. return True
def isDefined(ea): flags = idaapi.getFlags(ea) if not idc.isStruct(flags): return False if not idc.isHead(flags): return False if idaapi.get_name(idaapi.BADADDR, ea) != TypeDescriptor.makeName(ea): return False return True
def getOrigDisasm(self): # type: () -> str """ Gets the original disassembly without any further applied transformations However, the formatting is different from the original and is more convenient for parsing :return: the disassembly """ flags = idc.GetFlags(self.ea) if idc.isStruct(flags): disasm = "INVALID" elif idc.isAlign(flags): disasm = idc.GetDisasm(self.ea) elif idc.isData(flags): disasm = self._getDataDisasm(self.ea) else: disasm = idc.GetDisasm(self.ea) disasm = self._filterComments(disasm) while ' ' in disasm: disasm = disasm.replace(' ', ' ') return disasm
def getTypeName(self): # type: () -> str """ :return: the type of the data item, if it's a struct/enum/const, the name of it. a number of stars can follow, indicating that it's a pointer. """ type = idc.get_type(self.ea) flags = idc.GetFlags(self.ea) typeName = "INVALID" if idc.isCode(flags): typeName = "code" elif idc.isData(flags): if idc.is_byte(flags) and self.getSize() == 1: typeName = "u8" elif idc.is_word(flags) and self.getSize() == 2: typeName = "u16" elif idc.is_dword(flags) and self.getSize() == 4: if self.isPointer(self.getContent()): typeName = "void*" else: typeName = "u32" else: # The weird case... an array. I don't know why it's weird. IDA doesn't like it! # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line! firstLineSplitDisasm = list( filter(None, re.split('[ ,]', idc.GetDisasm(self.ea)))) dataType = firstLineSplitDisasm[0] if dataType == "DCB": typeName = "u8[%d]" % (self.getSize()) if dataType == "DCW": typeName = "u16[%d]" % (self.getSize() / 2) if dataType == "DCD": if self.hasPointer(): typeName = "void*[%d]" % (self.getSize() / 4) else: typeName = "u32[%d]" % (self.getSize() / 4) elif idc.isUnknown(flags): typeName = "u8" elif idc.isStruct(flags): typeName = idc.GetStrucName return typeName
def _read_struct_member_once(ea, flags, size, member_sid, member_size, asobject): """Read part of a struct member for _read_struct_member.""" if idc.isByte(flags): return read_word(ea, 1), 1 elif idc.isWord(flags): return read_word(ea, 2), 2 elif idc.isDwrd(flags): return read_word(ea, 4), 4 elif idc.isQwrd(flags): return read_word(ea, 8), 8 elif idc.isOwrd(flags): return read_word(ea, 16), 16 elif idc.isASCII(flags): return idc.GetManyBytes(ea, size), size elif idc.isFloat(flags): return idc.Float(ea), 4 elif idc.isDouble(flags): return idc.Double(ea), 8 elif idc.isStruct(flags): value = read_struct(ea, sid=member_sid, asobject=asobject) return value, member_size return None, size
def make_struc_member(self, object_version, address, member_type=ya.OBJECT_TYPE_STRUCT_MEMBER): struc_object_id = object_version.get_parent_object_id() struc_id = 0 try: struc_id = self.struc_ids[struc_object_id] except: return is_union = struc_id in self.union_ids offset = address if is_union: last_offset = idc.GetLastMember(struc_id) if last_offset == idc.BADADDR: last_offset = -1 if last_offset < offset: for i in xrange(last_offset + 1, offset + 1): idc.AddStrucMember(struc_id, "yaco_filler_%d" % i, 0, idc.FF_BYTE | idc.FF_DATA, -1, 1) # ensure that 'offset' fields are present member_size = object_version.get_size() member_name = object_version.get_name() flags = object_version.get_object_flags() if idc.isStruct(flags): # if the sub field is a struct, it must have a single Xref field with the struct object id try: sub_struc_object_id = object_version.getXRefIdsAt(0, 0)[0] sub_struc_id = self.struc_ids[sub_struc_object_id] # logger.debug("%20s: adding sub member at offset 0x%08X, # size=0x%08X (sub=0x%.016X, size=0x%08X) with name %s" % # ( # idc.GetStrucName(struc_id), offset, member_size, sub_struc_id, # idc.GetStrucSize(sub_struc_id), object_version.get_name() # )) sub_struc_size = idc.GetStrucSize(sub_struc_id) if sub_struc_size == 0: logger.error( "%20s: adding sub member at offset 0x%08X, size=0x%08X " "(sub=0x%.016X, size=0x%08X) with name %s : sub struc size is ZERO" % (idc.GetStrucName(struc_id), offset, member_size, sub_struc_id, idc.GetStrucSize(sub_struc_id), object_version.get_name())) else: nitems = member_size / sub_struc_size YaToolIDATools.SetStrucmember(struc_id, member_name, offset, flags, sub_struc_id, nitems) except KeyError: logger.error( "Error while looking for sub struc in struc %s, offset 0x%08X (field name='%s')" % (self.hash_provider.hash_to_string(struc_object_id), offset, object_version.get_name())) traceback.print_exc() elif idc.isEnum0(flags): # an enum is applied here try: sub_enum_object_id = object_version.getXRefIdsAt(0, 0)[0] sub_enum_id = self.enum_ids[sub_enum_object_id] name_ok = idc.SetMemberName(struc_id, offset, member_name) if name_ok is not True: logger.debug( "Error while setting member name (enum) : " "(struc=%s, member=%s, offset=0x%08X, mflags=%d, msize=%d, tid=0x%016X" % (name_ok, idc.GetStrucName(struc_id), member_name, offset, flags, member_size, sub_struc_id)) else: sub_enum_size = idc.GetEnumWidth(sub_enum_id) if sub_enum_size == 0: sub_enum_size = member_size nitems = member_size / sub_enum_size ret = idc.SetMemberType(struc_id, offset, flags, sub_enum_id, nitems) if ret == 0: logger.debug( "Error while setting member type (enum) : " "(struc=%s, member=%s, offset=0x%08X, mflags=%d, msize=%d, tid=0x%016X" % (ret, idc.GetStrucName(struc_id), member_name, offset, flags, member_size, sub_struc_id)) except KeyError: logger.error( "Error while looking for sub enum in struc %s, offset 0x%08X (field name='%s')" % (struc_object_id, offset, member_name)) traceback.print_exc() else: # logger.debug("%20s: adding member at offset 0x%08X, size=0x%08X with name %s" % # ( # idc.GetStrucName(struc_id), offset, member_size, object_version.get_name() # )) tid = -1 if idc.isASCII(flags): logger.debug( "object: %s : %s" % (self.hash_provider.hash_to_string( object_version.get_id()), object_version.get_name())) try: tid = object_version.get_string_type() except KeyError: tid = idc.ASCSTR_C name_ok = idc.SetMemberName(struc_id, offset, member_name) if name_ok is not True: logger.debug( "Error while setting member name :" + " (struc_id=0x%08X, struc=%s, member=%s, offset=0x%08X, mflags=%d, msize=%d)" % (struc_id, idc.GetStrucName(struc_id), member_name, offset, flags, member_size)) else: item_size = YaToolIDATools.get_field_size(flags, tid) nitems = member_size / item_size # IDA BUG : 4-byte chars are stored as 2 double words, thus me must # multiply nitem by 2! ret = idc.SetMemberType(struc_id, offset, flags, tid, nitems) if ret == 0: logger.debug( "Error while setting member type :" + " (struc=%s, member=%s, offset=0x%08X, mflags=%d, msize=%d)" % (idc.GetStrucName(struc_id), member_name, offset, flags, member_size)) try: repeatable_headercomment = self.sanitize_comment_to_ascii( object_version.get_header_comment(True)) idc.SetMemberComment(struc_id, offset, repeatable_headercomment, 1) except KeyError: pass try: nonrepeatable_headercomment = self.sanitize_comment_to_ascii( object_version.get_header_comment(False)) idc.SetMemberComment(struc_id, offset, nonrepeatable_headercomment, 0) except KeyError: pass member_id = idc.GetMemberId(struc_id, offset) self.set_struct_member_type(object_version, member_id) if object_version.get_type() == ya.OBJECT_TYPE_STRUCT_MEMBER: self.strucmember_ids[object_version.get_id()] = member_id
def make_data(self, object_version, address): size = 0 try: size = object_version.get_size() except KeyError: pass flags = None try: flags = object_version.get_object_flags() except KeyError: pass if size == 0: idc.MakeUnkn(address, idc.DOUNK_EXPAND) else: if flags is not None: if idc.isASCII(flags): try: str_type = object_version.get_string_type() YaToolIDATools.check_and_set_str_type(str_type) except KeyError: pass idc.MakeStr(address, address + size) idc.SetFlags(address, flags) if idc.isStruct(flags): found = False for xref_offset_operand, xref_id_attr in object_version.get_xrefed_id_map( ).iteritems(): (xref_offset, xref_operand) = xref_offset_operand for xref_hash, xref_attrs in xref_id_attr: if xref_hash in self.struc_ids: struc_id = self.struc_ids[xref_hash] if DEBUG_EXPORTER: logger.debug( "making unknown from 0x%08X to 0x%08X" % (address, address + size)) idaapi.do_unknown_range( address, size, idc.DOUNK_DELNAMES) # idc.MakeUnkn(address, DOUNK_SIMPLE | idc.DOUNK_DELNAMES) # for i in xrange(1, size): # MakeName(address + i, "") # idc.MakeUnkn(address + i, DOUNK_SIMPLE | idc.DOUNK_DELNAMES) # idc.MakeStructEx uses idaapi.doStruct (but asks for name while # we already have the struc id) if DEBUG_EXPORTER: logger.debug( "Making struc at %s : %s (sizeof(%s)=0x%08X, size=0x%08X, flags=0x%08X" % (self.yatools.address_to_hex_string( address), self.yatools.address_to_hex_string( struc_id), idaapi.get_struc_name(struc_id), idc.GetStrucSize(struc_id), size, flags)) idc.SetCharPrm(idc.INF_AUTO, True) idc.Wait() if idaapi.doStruct(address, size, struc_id) == 0: if DEBUG_EXPORTER: logger.warning("Making struc failed") idc.SetCharPrm(idc.INF_AUTO, False) # idc.SetFlags(address, flags) found = True else: logger.error( "bad struc flags : idc.isStruct is true but no xref available for object %s" % self.hash_provider.hash_to_string( object_version.get_id())) if not found: logger.error( "bad struc flags : idc.isStruct is true " "but no struc available for object %s (%s)" % (self.hash_provider.hash_to_string( object_version.get_id()), object_version.get_name())) else: idc.MakeData(address, flags & (idc.DT_TYPE | idc.MS_0TYPE), size, 0) else: idc.MakeData(address, idc.FF_BYTE, size, 0) self.make_name(object_version, address, False) self.set_type(object_version, address)
def is_struct(self): return idc.isStruct(self.flags)
def getContent(self, bin=False): """ reads bytes at the EA of the data item and constructs its content representation based on its type :param bin: if True, array of bytes is always passed back """ flags = idc.GetFlags(self.ea) output = -1 if idc.isCode(flags): # an instruction is also data, its bytes are gathered and combined into one integer bytes = [] for char in idc.get_bytes(self.ea, self.getSize()): bytes.append(ord(char)) # either return one discrete instruction int, or an array of bytes representing it if bin: output = bytes else: output = self._combineBytes(bytes, self.getSize())[0] elif idc.isStruct(flags): pass elif idc.isData(flags): # normal case, build up a u8, u16, or u32 if idc.is_data(flags) and ( idc.is_byte(flags) and self.getSize() == 1 or idc.is_word(flags) and self.getSize() == 2 or idc.is_dword(flags) and self.getSize() == 4): bytes = [] for char in idc.get_bytes(self.ea, self.getSize()): bytes.append(ord(char)) # either return one discrete primitive, or the array of bytes representing it if bin: output = bytes else: output = self._combineBytes(bytes, self.getSize())[0] # The weird case... an array. I don't know why it's weird. IDA doesn't like it! else: # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line! firstLineSplitDisasm = list( filter(None, re.split('[ ,]', idc.GetDisasm(self.ea)))) dataType = firstLineSplitDisasm[0] elemsPerLine = len( firstLineSplitDisasm ) - 1 # don't include type, ex: DCB 0, 4, 5, 0x02, 0 # Grab all of the bytes in the array bytes = [] for char in idc.get_bytes(self.ea, idc.get_item_size(self.ea)): bytes.append(ord(char)) # figure out datatype to convert the array to be of bytesPerElem = dataType == 'DCB' and 1 \ or dataType == 'DCW' and 2 \ or dataType == 'DCD' and 4 \ or 1 # if type unknown, just show it as a an array of bytes # create new array with correct type, or just return the bytes if bin: output = bytes else: output = self._combineBytes(bytes, bytesPerElem) elif idc.isUnknown(flags): # unknown data elements are always 1 byte in size! output = ord(idc.get_bytes(self.ea, 1)) if bin: output = [output] return output