def datify(self): n = 0 ea = self.get_start_ea(self.DATA) print "\nLooking for possible strings starting at: %s:0x%X" % (idc.SegName(ea), ea) for s in idautils.Strings(): if s.ea > ea: if not idc.isASCII(idc.GetFlags(s.ea)) and idc.MakeStr(s.ea, idc.BADADDR): n += 1 print "Created %d new ASCII strings" % n print "Converting remaining data to DWORDs...", while ea != idc.BADADDR: flags = idc.GetFlags(ea) if idc.isUnknown(flags) or idc.isByte(flags): idc.MakeDword(ea) idc.OpOff(ea, 0, 0) ea = idc.NextAddr(ea) print "done.\n"
def disassemble_new_targets(self, enabled): for value in self.results.values: flag = idc.GetFlags(value) if not idc.isCode(flag) and idc.isUnknown(flag): res = idc.MakeCode(value) if res == 0: print "Try disassemble at:" + hex(value) + " KO" #TODO: Rollback ? else: print "Try disassemble at:" + hex(value) + " Success !"
def type_to_string(self, t): if idc.isCode(t): return "C" elif idc.isData(t): return "D" elif idc.isTail(t): return "T" elif idc.isUnknown(t): return "Ukn" else: return "Err"
def unk2ArrRng(start_ea, end_ea): """ converts all completely unknowns to byte arrays """ d = Data.Data(start_ea) while d.ea < end_ea: if d.getName() and idc.isUnknown(idc.GetFlags(d.ea)): name = d.getName() if unk2Arr(d.ea): print('%s -> %s' % (name, d.getName())) d = Data.Data(d.ea) d = Data.Data(d.ea + d.getSize())
def renameDword(self): proc_addr = self._import_table.item(self._import_table.currentRow(), 3).text() proc_name = str(self._import_table.item(self._import_table.currentRow(), 2).text()) renamed = 0 if proc_addr: try: proc_addr = int(proc_addr, 16) proc_bin_str = " ".join([x.encode("hex") for x in struct.pack("<I", proc_addr)]) next_dword = idc.FindBinary(idc.MinEA(), idc.SEARCH_DOWN | idc.SEARCH_NEXT, proc_bin_str) while next_dword != idc.BADADDR: log.debug("Trying to fix-up 0x{:08x}".format(next_dword)) # DWORDs can be "inaccessible" for many reasons and it requires "breaking up" the data blobs # and manually fixing them # Reason 1: In a dword array in an unknown section if idc.isUnknown(next_dword): idc.MakeUnkn(next_dword, idc.DOUNK_EXPAND) idc.MakeDword(next_dword) # Reason 2: In a dword array in a data section elif idc.isData(next_dword): hd = idc.ItemHead(next_dword) idc.MakeDword(hd) idc.MakeDword(next_dword) # Reason 3: In a dword array in a code section (validate via "dd <dword>,") elif idc.isCode(next_dword) and idc.GetDisasm(next_dword).startswith("dd "): hd = idc.ItemHead(next_dword) idc.MakeDword(hd) idc.MakeDword(next_dword) # Only perform if idc.Name(next_dword).startswith(("off_", "dword_")) or idc.Name(next_dword) == "": success = idc.MakeNameEx(next_dword, proc_name, idc.SN_NOWARN | idc.SN_NON_AUTO) i = 0 new_proc_name = proc_name while not success and i < 10: new_proc_name = "{}{}".format(proc_name, i) success = idc.MakeNameEx(next_dword, new_proc_name, idc.SN_NOWARN | idc.SN_NON_AUTO) i += 1 if success: renamed += 1 item = self._import_table.item(self._import_table.currentRow(), 5) item.setText("{}, {}".format(str(item.text()), new_proc_name)) log.debug("DWORD @ 0x{:08x} now has name {}".format(next_dword, new_proc_name)) else: log.error("Unable to auto-rename successfully, terminating search") break else: log.debug("Value at 0x{:08x} does not meet renaming requirements".format(next_dword)) next_dword = idc.FindBinary(next_dword + 4, idc.SEARCH_DOWN | idc.SEARCH_NEXT, proc_bin_str) except Exception, e: log.error("Error encountered: {}".format(e)) log.debug("Renamed {:d} instances of {}".format(renamed, proc_name))
def renameDword(self): proc_addr = self._import_table.item(self._import_table.currentRow(), 3).text() proc_name = str(self._import_table.item(self._import_table.currentRow(), 2).text()) renamed = 0 if proc_addr: try: proc_addr = int(proc_addr, 16) proc_bin_str = " ".join([x.encode("hex") for x in struct.pack("<I", proc_addr)]) next_dword = idc.FindBinary(idc.MinEA(), idc.SEARCH_DOWN|idc.SEARCH_NEXT, proc_bin_str) while next_dword != idc.BADADDR: log.debug("Trying to fix-up 0x{:08x}".format(next_dword)) # DWORDs can be "inaccessible" for many reasons and it requires "breaking up" the data blobs # and manually fixing them # Reason 1: In a dword array in an unknown section if idc.isUnknown(next_dword): idc.MakeUnkn(next_dword, idc.DOUNK_EXPAND) idc.MakeDword(next_dword) # Reason 2: In a dword array in a data section elif idc.isData(next_dword): hd = idc.ItemHead(next_dword) idc.MakeDword(hd) idc.MakeDword(next_dword) # Reason 3: In a dword array in a code section (validate via "dd <dword>,") elif idc.isCode(next_dword) and idc.GetDisasm(next_dword).startswith("dd "): hd = idc.ItemHead(next_dword) idc.MakeDword(hd) idc.MakeDword(next_dword) # Only perform if idc.Name(next_dword).startswith(("off_", "dword_")) or idc.Name(next_dword) == "": success = idc.MakeNameEx(next_dword, proc_name, idc.SN_NOWARN|idc.SN_NON_AUTO) i = 0 new_proc_name = proc_name while not success and i < 10: new_proc_name = "{}{}".format(proc_name, i) success = idc.MakeNameEx(next_dword, new_proc_name, idc.SN_NOWARN|idc.SN_NON_AUTO) i += 1 if success: renamed += 1 item = self._import_table.item(self._import_table.currentRow(), 5) item.setText("{}, {}".format(str(item.text()), new_proc_name)) log.debug("DWORD @ 0x{:08x} now has name {}".format(next_dword, new_proc_name)) else: log.error("Unable to auto-rename successfully, terminating search") break else: log.debug("Value at 0x{:08x} does not meet renaming requirements".format(next_dword)) next_dword = idc.FindBinary(next_dword+4, idc.SEARCH_DOWN|idc.SEARCH_NEXT, proc_bin_str) except Exception, e: log.error("Error encountered: {}".format(e)) log.debug("Renamed {:d} instances of {}".format(renamed, proc_name))
def collapseUnknowns(start_ea, end_ea, verbose=True): """ Changes all initial unknown heads into byte arrays until the next defined reference or next label :param state_ea: range start for collapsing :param end_ea: range end for collapsing :param verbose: if True, print all changes :return: Fix status """ ea = start_ea ea = next.byDataElement(ea, lambda ea: idc.isUnknown(idc.GetFlags(ea)), ui=False) if ea >= end_ea: return False while ea < end_ea: if verbose: print('%07X: make array till reference/name' % ea) ops.arrTillRef(ea) ea = next.byDataElement(ea, lambda ea: idc.isUnknown(idc.GetFlags(ea)), ui=False) return True
def unksToArrs(start_ea, end_ea): """ linear addresses to pointers of the unks to turn to arrays """ ea = start_ea while ea < end_ea: d = Data.Data(ea) ea += d.getSize() content = d.getContent() if type(content) == list or not idc.isUnknown( idc.GetFlags(content)) or not d.isPointer(content): continue print('%07X' % content) arrTillName(content)
def unk2Arr(ea): start_ea = ea d = Data.Data(ea) if not d.getName() or not d.isPointer(d.ea): return False # ensure that it's all unknowns till next name allUnks = True while True: allUnks = idc.isUnknown(idc.GetFlags(ea)) ea += 1 if idc.Name(ea) or not allUnks: break if not allUnks: return False arrTillName(start_ea) return True
def testNoUnknowns(self): for seg_ea in idautils.Segments(): prevHead = seg_ea-1 for head in idautils.Heads(seg_ea, idc_bc695.SegEnd(seg_ea)): # confirm not unknown f = idc.GetFlags(head) if idc.isUnknown(f): Test.fail("Detected Unknown @ %08X" % head) # make sure that the next head is always 1 byte before the previous Test.assertEquals(head, prevHead+1, "Non-continuous heads: %08X -> %08X" % (prevHead, head)) # remember curr state for next iteration prevHead = head
def getDisasm(self): """ :return: transformed disassembly so that it's functional with the gcc assembler """ disasm = self.getOrigDisasm() flags = idc.GetFlags(self.ea) if idc.isAlign(flags): disasm = self._convertAlignDisasm(disasm) elif idc.isData(flags) or idc.isUnknown(flags): disasm = self._convertData(disasm) elif idc.isCode(flags): disasm = self._convertCode(self.ea, disasm) # make code small case disasm = self._lowerCode(disasm) disasm = self._convertTabs(disasm) return disasm
def datify(self): ea = self.get_start_ea(self.DATA) if ea == idc.BADADDR: ea = idc.FirstSeg() print "Converting remaining data to DWORDs...", while ea != idc.BADADDR: flags = idc.GetFlags(ea) if idc.isUnknown(flags) or idc.isByte(flags): idc.MakeDword(ea) idc.OpOff(ea, 0, 0) ea = idc.NextAddr(ea) print "done."
def datify(self): ea = self.get_start_ea(self.DATA) if ea == idc.BADADDR: ea = idc.FirstSeg() print "Converting remaining data to DWORDs...", while ea != idc.BADADDR: flags = idc.GetFlags(ea) if (idc.isUnknown(flags) or idc.isByte(flags)) and ((ea % 4) == 0): idc.MakeDword(ea) idc.OpOff(ea, 0, 0) ea = idc.NextAddr(ea) print "done."
def nextknown(self, ea, ui=True): """ Finds the next ea with which a name exists :param ea: ea to start searching from :param ui: if True, jump to address automatically :return: hex formatted ea of next name """ # don't count this item ea = Data.Data(ea).ea + Data.Data(ea).getSize() output = idaapi.BADADDR while ea < self.end_ea: d = Data.Data(ea) if not idc.isUnknown(d._getFlags()): output = ea break ea += d.getSize() if ui: idaapi.jumpto(ea) return '%07X' % output
def datify(self): ea = self.get_start_ea(self.DATA) if ea == idc.BADADDR: ea = idc.FirstSeg() self.say("Converting remaining data to DWORDs...", ) while ea != idc.BADADDR: flags = idc.GetFlags(ea) if (idc.isUnknown(flags) or idc.isByte(flags)) and ((ea % 4) == 0): idc.MakeDword(ea) idc.OpOff(ea, 0, 0) ea = idc.NextAddr(ea) self.say("done.") self._fix_data_offsets()
def getTypeName(self): # type: () -> str """ :return: the type of the data item, if it's a struct/enum/const, the name of it. a number of stars can follow, indicating that it's a pointer. """ type = idc.get_type(self.ea) flags = idc.GetFlags(self.ea) typeName = "INVALID" if idc.isCode(flags): typeName = "code" elif idc.isData(flags): if idc.is_byte(flags) and self.getSize() == 1: typeName = "u8" elif idc.is_word(flags) and self.getSize() == 2: typeName = "u16" elif idc.is_dword(flags) and self.getSize() == 4: if self.isPointer(self.getContent()): typeName = "void*" else: typeName = "u32" else: # The weird case... an array. I don't know why it's weird. IDA doesn't like it! # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line! firstLineSplitDisasm = list( filter(None, re.split('[ ,]', idc.GetDisasm(self.ea)))) dataType = firstLineSplitDisasm[0] if dataType == "DCB": typeName = "u8[%d]" % (self.getSize()) if dataType == "DCW": typeName = "u16[%d]" % (self.getSize() / 2) if dataType == "DCD": if self.hasPointer(): typeName = "void*[%d]" % (self.getSize() / 4) else: typeName = "u32[%d]" % (self.getSize() / 4) elif idc.isUnknown(flags): typeName = "u8" elif idc.isStruct(flags): typeName = idc.GetStrucName return typeName
def isUnknown(self): return idc.isUnknown(idc.GetFlags(self.ea))
def is_unknown(va): return idc.isUnknown(idc.GetFlags(va))
print hex(cur_addr), idc.GetDisasm(cur_addr) cur_addr = idc.NextHead(cur_addr) #F这个参数需要先通过idc.GetFlags(ea)获取地址的内部标志表示形式,然后再传给idc.is*系列函数当参数 #判断IDA是否将其判定为代码 idc.isCode(F) #判断IDA是否将其判定为数据 idc.isData(F) #判断IDA是否将其判定为尾部 idc.isTail(F) #判断IDA是否将其判定为未知(既不是数据,也不是代码) idc.isUnknown(F) #判断IDA是否将其判定为头部 idc.isHead(F) #0x100001f77L mov rbx, rsi #True ea = here() print hex(ea), idc.GetDisasm(ea) print idc.isCode(idc.GetFlags(ea)) # idc.FindCode(ea, flag) 该函数用于寻找被标记为代码的下一个地址. 对于想要查找数据块的尾部很有帮助 #0x1000013c0L text "UTF-16LE", '{00000000-0000-0000-0000-000000000000}',0 #0x1000014f8L xor r11d, r11d ea = here() print hex(ea), idc.GetDisasm(ea)
def isUnknown(ea): '''True if ea marked unknown''' return idc.isUnknown( idc.GetFlags(ea) )
def getContent(self, bin=False): """ reads bytes at the EA of the data item and constructs its content representation based on its type :param bin: if True, array of bytes is always passed back """ flags = idc.GetFlags(self.ea) output = -1 if idc.isCode(flags): # an instruction is also data, its bytes are gathered and combined into one integer bytes = [] for char in idc.get_bytes(self.ea, self.getSize()): bytes.append(ord(char)) # either return one discrete instruction int, or an array of bytes representing it if bin: output = bytes else: output = self._combineBytes(bytes, self.getSize())[0] elif idc.isStruct(flags): pass elif idc.isData(flags): # normal case, build up a u8, u16, or u32 if idc.is_data(flags) and ( idc.is_byte(flags) and self.getSize() == 1 or idc.is_word(flags) and self.getSize() == 2 or idc.is_dword(flags) and self.getSize() == 4): bytes = [] for char in idc.get_bytes(self.ea, self.getSize()): bytes.append(ord(char)) # either return one discrete primitive, or the array of bytes representing it if bin: output = bytes else: output = self._combineBytes(bytes, self.getSize())[0] # The weird case... an array. I don't know why it's weird. IDA doesn't like it! else: # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line! firstLineSplitDisasm = list( filter(None, re.split('[ ,]', idc.GetDisasm(self.ea)))) dataType = firstLineSplitDisasm[0] elemsPerLine = len( firstLineSplitDisasm ) - 1 # don't include type, ex: DCB 0, 4, 5, 0x02, 0 # Grab all of the bytes in the array bytes = [] for char in idc.get_bytes(self.ea, idc.get_item_size(self.ea)): bytes.append(ord(char)) # figure out datatype to convert the array to be of bytesPerElem = dataType == 'DCB' and 1 \ or dataType == 'DCW' and 2 \ or dataType == 'DCD' and 4 \ or 1 # if type unknown, just show it as a an array of bytes # create new array with correct type, or just return the bytes if bin: output = bytes else: output = self._combineBytes(bytes, bytesPerElem) elif idc.isUnknown(flags): # unknown data elements are always 1 byte in size! output = ord(idc.get_bytes(self.ea, 1)) if bin: output = [output] return output
def InsIsUnknown(i): return idc.isUnknown(idaapi.getFlags(i))
def isUnknown(ea): '''True if ea marked unknown''' return idc.isUnknown(idc.GetFlags(ea))
def is_unknow(self): """True if current object type is unknow """ return idc.isUnknown(self.flags)