def _getPoolDisasm(self): # type: () -> str """ Disassembles pool in a linker compatible version. If the instruction is not a pool instruction :param ea: ea of inst :return: disassembly with the correct LDR/STR [PC, ...] format or '' :raise DataException: if an invalid state is entered while parsing the pool load """ output = '' if 'LDR' in idc.GetMnem(self.ea) and Instruction.isPoolLDR(self.ea): insn = Instruction.Insn(self.ea) inst = 'LDR' reg = 'R%d' % (insn.ops[0].reg) # retrieve the pool address from the instruction pool_ea = insn.ops[1].addr poolData = Data(pool_ea) offset = pool_ea - poolData.ea # every pool reference must have a name if not poolData.getName(): raise (DataException( "%07X: pool_ea %07X does not have a name" % (self.ea, pool_ea))) content = poolData.getContent() # the pool might point to an array. The offset determines which element is loaded if type(content) == list: content = content[offset / (poolData.getSize() / len(content))] if type(content) != int and type(content) != long: raise (DataException( "%07X: attempt to load non-int to register" % pool_ea)) # write the actual pool value being loaded for readability contentData = Data(content) if contentData.isPointer(contentData.ea): # figure out unsync between xref of pool and content data... that's the index +! # depending on the data format of the value in the db, it may have no xrefs... if poolData.getXRefsFrom()[1]: contentXref = poolData.getXRefsFrom()[1][0] if contentXref - contentData.ea > 0: index = "+%d" % (contentXref - contentData.ea) elif contentXref - contentData.ea < 0: index = "-%d" % (contentData.ea - contentXref) else: index = '' else: index = '' cmt = "=%s%s" % (contentData.getName(), index) else: cmt = "=0x%X" % content # in case pool_ea is within an array, then it could be offset if offset: offset = '+%d' % offset else: offset = '' output = "%s %s, %s%s // %s " % (inst, reg, poolData.getName(), offset, cmt) # TODO: old output style for debugging purposes # if self.getSize() == 4: # shift = 8 # elif (pool_ea - self.ea - 4) % 4 != 0: # # to achieve word alignment, we round down to the last word aligned value # shift = 2 # else: # # normal case, PC is 2 instructions ahead # shift = 4 # # output = "%s %s, [PC, #0x%07X-0x%07X-%d] // %s" % (inst, reg, # pool_ea, self.ea, shift, cmt) return output
def _convertCode(self, disasm): """ modifies code data items so that they're compatible with arm-none-eabi-gcc :param ea: (long) addr of disasm :param disasm: (str) disasm to transform :return: (str) converted disasm """ flags = idc.GetFlags(self.ea) output = disasm # Default case, no modifications if idc.isCode(flags): # some instructions take no operands, like NOP instName = idc.GetMnem(self.ea) # if the instruction is THUMB, it cannot have an 'S' in it... (except for branches) # the BIC instruction is not a branch, account for that isThumb = self.getSize() == 2 isBranch = instName[0] == 'B' and 'BIC' not in instName hasCond = instName[-1] == 'S' if isThumb and not isBranch and hasCond: output = instName[:-1] + ' ' + output[len(instName):].lstrip() instName = instName[:-1] # adjust instruction spacing TODO: tabs or pad`s for instruction? output = instName + ' ' + output[len(instName):].lstrip() # convert immediate reference instructions output = self._convertImmediateReferences(output) # if the instruction is a pool instruction, the format should be changed poolDisasm = self._getPoolDisasm() if poolDisasm: output = poolDisasm # convert MOV Rlow, Rlow to a meaner form LSL Rx, Ry, #0. Thanks anyway, IDA. if instName == 'MOV' and output.count(',') == 1: insn = Instruction.Insn(self.ea) if insn.ops[1].type == ida_ua.o_reg and insn.ops[ 0].reg <= 7 and insn.ops[1].reg <= 7: output = output.replace(instName, 'LSL', 1) + ', #0' # convert ADD Rlow, Rlow, #0 to a nicer form MOV Rlow, Rlow. This is what arm-none-eabi-as does. if instName == 'ADD' and output.endswith( ', #0') and output.count(',') > 1: insn = Instruction.Insn(self.ea) if insn.ops[1].type == ida_ua.o_reg and insn.ops[ 0].reg <= 7 and insn.ops[1].reg <= 7: output = output.replace(instName, 'MOV', 1)[:output.rindex(',')] # if the instruction is an adc, replace it with a short if "ADR " in instName: output = "DCW 0x%X // %s" % (self.getContent(), output) output = self._convertData(output) # parse comment commands -- if it's a redundant instruction, it should have the <mkdata> tag in it # if "<mkdata>" in self.getComment(): # output = "DCW 0x%X // %s" % (self.getContent(), output) # output = self._convertData(output) return output