def parse_hdr(self): ''' Refer: function [go12Init()] in https://golang.org/src/debug/gosym/pclntab.go ''' magic = idc.Dword(self.start_addr) & 0xFFFFFFFF if magic != Pclntbl.MAGIC: print magic, Pclntbl.MAGIC common._error("Invalid pclntbl header magic number!") idc.Exit(1) #raise Exception("Invalid pclntbl header magic number!") idc.MakeDword(self.start_addr) idc.MakeComm(self.start_addr, "Magic Number") idc.MakeNameEx(self.start_addr, "runtime_symtab", flags=idaapi.SN_FORCE) idaapi.autoWait() if idc.Word(self.start_addr + 4) & 0xFFFF != 0: raise Exception("Invalid pclntbl header") idc.MakeWord(self.start_addr + 4) self.min_lc = idc.Byte(self.start_addr + 6) & 0xFF if (self.min_lc != 1) and (self.min_lc != 2) and (self.min_lc != 4): raise Exception("Invalid pclntbl minimum LC!") idc.MakeComm(self.start_addr + 6, "instruction size quantum") idaapi.autoWait() self.ptr_sz = idc.Byte(self.start_addr + 7) & 0xFF if (self.ptr_sz != 4) and (self.ptr_sz != 8): raise Exception("Invalid pclntbl pointer size!") idc.MakeComm(self.start_addr + 7, "ptr size") idaapi.autoWait()
def isGlobalAsciiString(self, ea): r"""Check if the given address is the beginning of a valid global string. Args: ea (int): effective address to be checked Notes ----- 1. The string should be aligned (global alignment). 2. The string should only contain chars from our alpha-bet. 3. The string should be '\0' terminated. 4. If applicable, the string should be padded with the correct padding byte. 5. The string's length must be at least the required lower bound. Return Value: True iff the given address could be the start of a global string """ # start by checking the alignment if ea % self._global_alignment != 0: return False str_content = self.getAsciiString(ea) # check each of the chars if str_content is None or len(filter(lambda x: x in self._valid_alphabet, str_content)) != len(str_content): return False # check for a '\0' terminator if idc.Byte(ea + len(str_content)) != ord('\0'): return False # check for the correct padding if self._global_pad is not None: end_address = ea + len(str_content) + 1 for offset in xrange(padSize(end_address, self._global_alignment)): if idc.Byte(end_address + offset) != self._global_pad: return False # basic length return len(str_content) >= self._min_global_length
def data(self): h = self.keleven for ea in idautils.FuncItems(self.offset): h = self._cycle(h, idc.Byte(ea)) # go over all additional bytes of any instruction for i in range(ea + 1, ea + idc.ItemSize(ea)): h = self._cycle(h, idc.Byte(i)) return h
def data(self): h = self.keleven for ea in idautils.FuncItems(self.offset): h = self._cycle(h, idc.Byte(ea)) # skip additional bytes of any instruction that contains an offset in it if idautils.CodeRefsFrom(ea, False) or idautils.DataRefsFrom(ea): continue for i in range(ea + 1, ea + idc.ItemSize(ea)): h = self._cycle(h, idc.Byte(i)) return h
def data(cls, offset): if cls.inst_count(offset) < 3: return None h = cls.keleven for ea in idautils.FuncItems(offset): h = cls._cycle(h, idc.Byte(ea)) # go over all additional bytes of any instruction for i in range(ea + 1, ea + idc.ItemSize(ea)): h = cls._cycle(h, idc.Byte(i)) return h
def parse(self): _debug("Map Type @ 0x%x" % self.addr) map_attr_addr = self.addr + self.rtype.self_size key_type_addr = read_mem(map_attr_addr) if self.type_parser.has_been_parsed(key_type_addr): self.key_type = self.type_parser.parsed_types[key_type_addr] else: self.key_type = self.type_parser.parse_type(type_addr=key_type_addr) elem_type_addr = read_mem(map_attr_addr + ADDR_SZ) if self.type_parser.has_been_parsed(elem_type_addr): self.elem_type = self.type_parser.parsed_types[elem_type_addr] else: self.elem_type = self.type_parser.parse_type(type_addr=elem_type_addr) buck_type_addr = read_mem(map_attr_addr + 2*ADDR_SZ) if self.type_parser.has_been_parsed(buck_type_addr): self.buck_type = self.type_parser.parsed_types[buck_type_addr] else: self.buck_type = self.type_parser.parse_type(type_addr=buck_type_addr) if self.go_subver < 14: self.key_size = idc.Byte(map_attr_addr + 3*ADDR_SZ) & 0xFF self.val_size = idc.Byte(map_attr_addr + 3*ADDR_SZ + 1) & 0xFF self.buck_size = read_mem(map_attr_addr + 3*ADDR_SZ + 2, forced_addr_sz=2) & 0xFFFF self.flags = read_mem(map_attr_addr + 3*ADDR_SZ + 4, forced_addr_sz=4) & 0xFFFFFFFF else: self.hasher_func_addr = read_mem(map_attr_addr + 3*ADDR_SZ) self.key_size = idc.Byte(map_attr_addr + 4*ADDR_SZ) & 0xFF self.val_size = idc.Byte(map_attr_addr + 4*ADDR_SZ + 1) & 0xFF self.buck_size = read_mem(map_attr_addr + 4*ADDR_SZ + 2, forced_addr_sz=2) & 0xFFFF self.flags = read_mem(map_attr_addr + 4*ADDR_SZ + 4, forced_addr_sz=4) & 0xFFFFFFFF self.name = "map [%s]%s" % (self.key_type.name, self.elem_type.name) idc.MakeComm(map_attr_addr, "Key type: %s" % self.key_type.name) idc.MakeComm(map_attr_addr + ADDR_SZ, "Elem type: %s " % self.elem_type.name) idc.MakeComm(map_attr_addr + 2*ADDR_SZ, "Bucket type: %s" % self.buck_type.name) if self.go_subver < 14: idc.MakeComm(map_attr_addr + 3*ADDR_SZ, "Key size: 0x%x" % self.key_size) idc.MakeComm(map_attr_addr + 3*ADDR_SZ + 1, "Value size: 0x%x" % self.val_size) idc.MakeComm(map_attr_addr + 3*ADDR_SZ + 2, "Bucket size: 0x%x" % self.buck_size) idc.MakeComm(map_attr_addr + 3*ADDR_SZ + 4, "Flags: 0x%x" % self.flags) else: idc.MakeComm(map_attr_addr + 3*ADDR_SZ, "hash function for hashing keys (ptr to key, seed) -> hash") idc.MakeComm(map_attr_addr + 4*ADDR_SZ, "Key size: 0x%x" % self.key_size) idc.MakeComm(map_attr_addr + 4*ADDR_SZ + 1, "Value size: 0x%x" % self.val_size) idc.MakeComm(map_attr_addr + 4*ADDR_SZ + 2, "Bucket size: 0x%x" % self.buck_size) idc.MakeComm(map_attr_addr + 4*ADDR_SZ + 4, "Flags: 0x%x" % self.flags) idaapi.autoWait() _debug("Map Key type: %s" % self.key_type.name) _debug("Map Elem type: %s " % self.elem_type.name)
def get_string(addr): """ idc.GetString may be return wrong length. For example: 00096d10f7872706af8155d40ddc4dab address 0x0001A7D4 string length 8, but idc.GetString returns 3. """ string = "" while True: if idc.Byte(addr) != 0: string += chr(idc.Byte(addr)) else: break addr += 1 return string
def data(cls, offset): if cls.inst_count(offset) < 3: return None h = cls.keleven for ea in idautils.FuncItems(offset): h = cls._cycle(h, idc.Byte(ea)) # skip additional bytes of any instruction that contains an offset in it if idautils.CodeRefsFrom(ea, False) or idautils.DataRefsFrom(ea): continue for i in range(ea + 1, ea + idc.ItemSize(ea)): h = cls._cycle(h, idc.Byte(i)) return h
def isLocalAsciiString(self, ea, check_refs=True): r"""Check if the given address is the beginning of a valid local string. Args: ea (int): effective address to be checked Notes ----- 0. If selected, the string must have a data reference to it. 1. The string should be aligned (local alignment). 2. The string should only contain chars from our alpha-bet. 3. The string should be '\0' terminated. 4. If applicable, the string should be padded with the correct padding byte. 5. The string's length must follow one of the following rules: a) Larger than the local alignment. b) At least 2 bytes, and the first is '%' (for short format strings). c) Exactly one byte, and it should be a punctuation char. d) At least 3 bytes. Return Value: True iff the given address could be the start of a local string """ # line should be referenced (as data) if check_refs and not self._analyzer.locals_identifier.isDataConstant( ea): return False str_content = self.getAsciiString(ea) # check each of the chars if str_content is None or len( filter(lambda x: x in self._valid_alphabet, str_content)) != len(str_content): return False # check for a '\0' terminator if idc.Byte(ea + len(str_content)) != ord('\0'): return False # check for the correct padding if self._local_pad is not None: end_address = ea + len(str_content) + 1 for offset in xrange(padSize(end_address, self._local_alignment)): if idc.Byte(end_address + offset) != self._local_pad: return False # filtering heuristic if len(str_content) > self._local_alignment: return True elif len(str_content) > 1 and str_content[0] == '%': return True elif len(str_content) == 1 and str_content[0] in string.punctuation: return True else: return len(str_content) > 2
def __PltResolver(jmprel,strtab,symtab,pltgot): seg_sec = idc.SegByName('.plt.sec') sec_start = idc.SegByBase(seg_sec) sec_end = idc.SegEnd(sec_start) if sec_start == idaapi.BADADDR: print "[-] can't find .plt.sec segment" return idx=0 while True: r_off = idc.Dword(jmprel+0x8*idx) r_info1 = idc.Byte(jmprel+0x8*idx+0x4) r_info2 = idc.Byte(jmprel+0x8*idx+0x5) if r_off > 0x7fffffff: return if r_info1 == 7: st_name = idc.Dword(symtab+r_info2*0x10) name = idc.GetString(strtab+st_name) # rename got idc.set_name(r_off,name+'_ptr') plt_func = idc.Dword(r_off) # rename plt idc.set_name(plt_func,'j_'+name) SetFuncFlags(plt_func) # rename plt.sec for addr in idautils.DataRefsTo(r_off): plt_sec_func = idaapi.get_func(addr) if plt_sec_func: plt_sec_func_addr = plt_sec_func.startEA idc.set_name(plt_sec_func_addr,'_'+name) SetFuncFlags(plt_sec_func_addr) else: print "[!] idaapi.get_func({}) failed".format(hex(addr)) got_off = r_off-pltgot target = '+{}h'.format(hex(got_off).lower().replace('0x','').replace('l','').rjust(2,'0')) for func_ea in idautils.Functions(sec_start,sec_end): func = idaapi.get_func(func_ea) cur = func.startEA end = func.endEA find=False while cur <= end: code = idc.GetDisasm(cur).lower().replace(' ','') if target in code: find=True break cur = idc.NextHead(cur, end) if find: idc.set_name(func_ea,'_'+name) SetFuncFlags(func_ea) idx+=1
def is_short_jmp(ea): b = idc.Byte(ea) if b in [0xEB, 0x74, 0x75]: #short jmp return (True, 1) elif b == 0xE9: #long jmp return (False, 1) elif b == 0x0F: b2 = idc.Byte(ea + 1) if b in [0x84, 0x85]: return (False, 2) else: pass #unexpected, throw assert print "unexpected byte @ 0x%x" % ea assert (False)
def load_slice(self, state, start, end): """ Return the memory objects overlapping with the provided slice. :param start: the start address :param end: the end address (non-inclusive) :returns: tuples of (starting_addr, memory_object) """ items = [] if start > self._page_addr + self._page_size or end < self._page_addr: l.warning("Calling load_slice on the wrong page.") return items for addr in range(max(start, self._page_addr), min(end, self._page_addr + self._page_size)): i = addr - self._page_addr mo = self._storage[i] if mo is None and hasattr(self, "from_ida_dbg"): byte_val = idc.Byte( addr) ### CHANGE TO SUPPORT OTHER DEBUGGERS mo = SimMemoryObject(claripy.BVV(byte_val, 8), addr) self._storage[i] = mo if mo is not None and (not items or items[-1][1] is not mo): items.append((addr, mo)) #print filter(lambda x: x != None, self._storage) return items
def force_create_function(loc): """ Similar to create_function above, but a little more hackish (maybe). Makes a lot of assumptions about there being defined code, i.e. not obfsucated code. However, won't create a function that does not include the desired location, which will need to be fixed at a later date. :param loc: Location a function is needed at :return: True if function is created, False otherwise """ # Do a couple sanity checks. if idaapi.get_func(loc): append_debug('There\'s already a function here!') return False elif idc.isAlign(idc.GetFlags(loc)) or idc.GetMnem(loc) == 'nop' or \ (idaapi.isData(idc.GetFlags(loc)) and idc.Byte(loc) == 0x90): append_debug('Can\'t make a function out of aligns and/or nops!') return False start = _force_find_start(loc) end = _find_force_end(loc) if idc.MakeFunction(start, end): append_debug('Created a function 0x%X - 0x%X.' % (start, end)) return True else: append_debug('Failed to create a function 0x%X - 0x%X.' % (start, end)) return False
def readValue(self): if self.value != None: return self.value operandType = self.parser.getOperandType() regName = self.parser.getRegName() regValue = idc.GetRegValue(regName) if regName != None else None if operandType == OperandType.Value64OfRegisterPlusOffset: self.value = idc.Qword(regValue + self.opValue) elif operandType == OperandType.Value32OfRegisterPlusOffset: self.value = idc.Dword(regValue + self.opValue) elif operandType == OperandType.Value16OfRegisterPlusOffset: self.value = idc.Word(regValue + self.opValue) elif operandType == OperandType.Value8OfRegisterPlusOffset: self.value = idc.Byte(regValue + self.opValue) elif (operandType == OperandType.Register64) or (operandType == OperandType.Register32): self.value = regValue elif (operandType == OperandType.Register16) or (operandType == OperandType.Register8): self.value = regValue elif operandType == OperandType.ImmediateUnkown: self.value = self.opValue else: raise Exception("Unknown operand type") return self.value
def patch(self, patch, fill_nop=True): """Change the content of object by `patch` if fill_nop is True and size(patch) < size(object): add some 0x90 """ print("PATCH ASKED at <{0}| size {1}> with {2}".format( self.addr, self.size, patch)) nop = 0x90 #<- need to adapt to other platform if self.size < len(patch): raise ValueError("Patch if too big for {0}".format(self)) if self.size != len(patch) and not fill_nop: pass # raise Value("Patch is too small for {0} and no fill_patch (better idea than raise ?)".format(self)) # Not patching other bytes seems cool ? full_patch = list(patch) + [nop] * (self.size - len(patch)) for addr, byte in zip(range(self.addr, self.addr + self.size), full_patch): if isinstance(byte, str): byte = ord(byte) if idc.Byte(addr) == byte: print("NOPATCH BYTE : SAME VALUE") continue if not idc.PatchByte(addr, byte): print("PATCH addr {0} with byte {1} failed".format( hex(addr), hex(byte)))
def stval(self, addr, ofs, sz=8): if addr < 20: """ argument """ if addr < 4: regs = ["rcx", "rdx", "r8", "r9"] addr = idc.GetRegValue(regs[addr]) else: rsp = idc.GetRegValue('rsp') addr = idc.Qword(rsp + addr * 8 + 8) if isinstance(ofs, basestring): sf = ofs.split('.') st = struct.Struct(sf[0]) res = st.readInst(addr) while len(sf) > 2: res = st.subInst(res, sf[1]) sf = sf[1:] if len(sf) > 1: res = res[sf[1]] return str(res) if sz == 8: return idc.Qword(addr + ofs) if sz == 4: return idc.Dword(addr + ofs) if sz == 2: return idc.Word(addr + ofs) return idc.Byte(addr + ofs)
def trim_func(ea, GetHead): """ Description: Steps until it hits something not a nop or not starts with 90 (nop opcode) nor an align or not byte 0xCC (Align 'opcode'). Input: ea - The location to adjust for nops and Aligns. EA must be a head. GetHead - either PrevHead or NextHead Output: The corrected EA. """ while idc.GetMnem(ea) == 'nop' or (idaapi.isData(idc.GetFlags(ea)) and idc.Byte(ea) == 0x90) or \ idc.isAlign(idc.GetFlags(ea)) or (not idc.isCode(idc.GetFlags(ea)) and idc.Byte(ea) == 0xCC): ea = GetHead(ea) return ea
def find_function_epilogue_bxlr(self, makecode=False): ''' Find opcode bytes corresponding to BX LR. This is a common way to return from a function call. Using the IDA API, convert these opcodes to code. This kicks off IDA analysis. ''' EAstart = idc.MinEA() EAend = idc.MaxEA() ea = EAstart length = 2 # this code isn't tolerant to values other than 2 right now fmt_string = "Possible BX LR 0x%08x == " for i in range(length): fmt_string += "%02x " while ea < EAend: instructions = [] for i in range(length): instructions.append(idc.Byte(ea + i)) if not ida_bytes.isCode(ida_bytes.getFlags(ea)) and instructions[ 0] == 0x70 and instructions[1] == 0x47: if self.printflag: print fmt_string % (ea, instructions[0], instructions[1]) if makecode: idc.MakeCode(ea) ea = ea + length
def getName(self, offset): #print "GetName: %x" % offset sid = idc.GetStrucIdByName("type") name_off = self.getDword(sid, offset, "string") string_addr = self.getOffset(name_off) + 3 ln = idc.Byte(string_addr - 1) return self.get_str(string_addr, ln)
def getAllMemoryFromIda(self): result = "" start = self.getBaseAddress() end = idc.SegEnd(start) for ea in lrange(start, end): result += chr(idc.Byte(ea)) return result
def guidAtAddr(self, addr): val = [idc.Dword(addr), idc.Word(addr + 4), idc.Word(addr + 6), []] addr += 8 val[3] = [idc.Byte(addr + i) for i in range(8)] h = GuidHelper() guid = h.guidOfVals(val) val = h.findGuid(guid) return (val or guid)
def get_guid(address): CurrentGUID = [] CurrentGUID.append(idc.Dword(address)) CurrentGUID.append(idc.Word(address + 4)) CurrentGUID.append(idc.Word(address + 6)) for addr in range(address + 8, address + 16, 1): CurrentGUID.append(idc.Byte(addr)) return CurrentGUID
def get_header_idb(): """ get file header from idb """ if idc.SegName(0) == "HEADER": header = bytearray([idc.Byte(ea) for ea in range(0, idc.SegEnd(0))]) return header return bytearray(b"")
def getString(self, ea): s = '' while True: b = idc.Byte(ea) if b == 0: return s s += chr(b) ea += 1
def decide(self): """Sum up the information from all of the seen records, and decide what is the alignment pattern. Return Value: (alignment, pad byte) if found a full pattern, (alignment, None) if no padding, and None for errors. """ # Sanity check if len(self._records) < 2: return None # Now check for a basic alignment rule seen_eas = map(lambda x: x[0], self._records) # Deterministic results per binary, but still random random.seed(int(idautils.GetInputFileMD5(), 16) & 0xFFFFFFFF) while True: # Check against two random candidates, and always make sure the representative isn't rare measure_candidate = seen_eas[random.randint(0, len(seen_eas) - 1)] measure_candidate_alt = seen_eas[random.randint(0, len(seen_eas) - 1)] gcds = map(lambda x: gcd(measure_candidate, x), seen_eas) gcds_alt = map(lambda x: gcd(measure_candidate_alt, x), seen_eas) alignment = min(gcds) alignment_alt = min(gcds_alt) if alignment > alignment_alt: alignment = alignment_alt measure_candidate = measure_candidate_alt try_again = True elif alignment != alignment_alt: try_again = True else: try_again = False # Try to check if removing outliers will improve the alignment if try_again or gcds.count(alignment) <= len(gcds) * 0.01: # pick the next element, and try to improve the result seen_eas = filter(lambda x: gcd(measure_candidate, x) != alignment, seen_eas) # we can't improve the results else: break # We shouldn't look for padding bytes (we have no size) if self._records[0][1] is None: return alignment # Alignment is 1, there is no padding to be found if alignment == 1: return (alignment, None) # Check if there is a common padding byte (skip the outliers) pad_byte = None for ea, size in filter(lambda x: x[0] % alignment == 0, self._records): for offset in xrange((alignment - ((ea + size) % alignment)) % alignment): test_byte = idc.Byte(ea + size + offset) if pad_byte is None: pad_byte = test_byte # Failed to find a single padding byte... elif pad_byte != test_byte: return (alignment, None) # Found a padding byte :) if pad_byte is not None: return (alignment, pad_byte) # There were no gaps to be padded, no padding is needed else: return (alignment, None)
def read_bytes_slowly(start, end): bytestr = [] for i in xrange(start, end): if idc.hasValue(idc.GetFlags(i)): bt = idc.Byte(i) bytestr.append(chr(bt)) else: bytestr.append("\x00") return "".join(bytestr)
def getDbgMem(ea, size): b = b'' for i in range(0, (size & (~7)), 8): b += struct.pack("<Q", idc.get_qword(ea + i)) for i in range(size & 7): b += struct.pack("<B", idc.Byte(ea + (size & (~7)) + i)) return b
def processStructField(self, addr, index): offset = addr + index sid = idc.GetStrucIdByName("structField") ptr = self.getPtr(sid, offset, "Name") ln = idc.Byte(ptr + 2) fieldName = self.get_str(ptr + 3, ln) Utils.rename(ptr, fieldName) ptr = self.getPtr(sid, offset, "typ") self.handle_offset(ptr)
def extractFunctionTypeSample(self, ea): """Extract features for a "code type" sample. Args: ea (int): effective address to be sampled Return Value: feature set (list of byte values) """ return map(lambda o: idc.Byte(ea + o), self._classifier_type_offsets)
def GetStr(ea): Version = "" i = 0 while True: bt = idc.Byte(ea + i) i = i + 1 if bt != 0: Version = Version + chr(bt) else: return Version