def get_swval(start_ea): """ea_t -> (str, op_ty, int)""" # XXX imperative ida.get_switch_info(start_ea) # confirm this insn has switch info distance = 0 ea = ida.prev_head(start_ea) while True: if distance > 10: raise SwitchTroubleError( "gave up looking for switch value between %s..%s" % (ida.atoa(ea), ida.atoa(start_ea)) ) if ida.is_switch_insn(ea): mnem = ida.get_mnem(ea) # dunno if there are other common switch idioms if mnem == "sltiu": return (mnem, ida.get_op(ea, 1), 1) elif list(ida.code_refs_from(ea, 0)) != []: raise SwitchTroubleError( "encountered branch/jump while looking for switch value " + "between %s..%s" % (ida.atoa(ea), ida.atoa(start_ea)) ) ea = ida.prev_head(ea) distance += 1
def get_swval(start_ea): '''ea_t -> (str, op_ty, int)''' # XXX imperative ida.get_switch_info(start_ea) # confirm this insn has switch info distance = 0 ea = ida.prev_head(start_ea) while True: if distance > 10: raise SwitchTroubleError( 'gave up looking for switch value between %s..%s' % (ida.atoa(ea), ida.atoa(start_ea))) if ida.is_switch_insn(ea): mnem = ida.get_mnem(ea) # dunno if there are other common switch idioms if mnem == 'sltiu': return (mnem, ida.get_op(ea, 1), 1) elif list(ida.code_refs_from(ea, 0)) != []: raise SwitchTroubleError( 'encountered branch/jump while looking for switch value ' + 'between %s..%s' % (ida.atoa(ea), ida.atoa(start_ea))) ea = ida.prev_head(ea) distance += 1
def get_op_addrmode(ea, op, cmd): '''ea_t -> int -> insn_t -> op_ret''' # the ida module calls back into this module to deal with some MIPS-specific # operand handling here mnem = ida.get_mnem(ea) op = ida.mips_op_hack(cmd, op) if cmd[op].type == ida.o_imm: val = cmd[op].value elif cmd[op].type == ida.o_displ: val = cmd[op].addr else: raise utils.BugError('neither imm nor displ passed to get_op_addrmode') if mnem in insns.has_special_opnd: target = val return ida.resolve_opnd(target, val) # addiu is often used for address calculation, which IDA will resolve to a # name, so handle addiu's immval only if we fail to resolve it later elif mnem != 'addiu' and mnem in insns.has_imm: return ida.op_ret(ida.op_ty.value, immval(val), 0) else: target = ida.calc_target(ea, ea, op, immval(val)) if target == ida.BADADDR and cmd[op].type == ida.o_displ: reg = cmd[op].reg if reg >= 0 and reg <= 31: reg = regs.gpr(reg) elif reg >= 32 and reg <= 63: reg = regs.fpr(reg) else: raise utils.BugError('bogus register %u' % reg) return ida.op_ret(ida.op_ty.displ, ida.displ(reg=reg, displ=immval(val)), 0) else: opnd = ida.resolve_opnd(target, val) if mnem == 'addiu' and opnd.ty == ida.op_ty.value: # addiu is being used for regular addition; handle its third # operand as an immediate value return ida.op_ret(ida.op_ty.value, immval(opnd.val), 0) else: return opnd
def get_op_addrmode(ea, op, cmd): """ea_t -> int -> insn_t -> op_ret""" # the ida module calls back into this module to deal with some MIPS-specific # operand handling here mnem = ida.get_mnem(ea) op = ida.mips_op_hack(cmd, op) if cmd[op].type == ida.o_imm: val = cmd[op].value elif cmd[op].type == ida.o_displ: val = cmd[op].addr else: raise utils.BugError("neither imm nor displ passed to get_op_addrmode") if mnem in insns.has_special_opnd: target = val return ida.resolve_opnd(target, val) # addiu is often used for address calculation, which IDA will resolve to a # name, so handle addiu's immval only if we fail to resolve it later elif mnem != "addiu" and mnem in insns.has_imm: return ida.op_ret(ida.op_ty.value, immval(val), 0) else: target = ida.calc_target(ea, ea, op, immval(val)) if target == ida.BADADDR and cmd[op].type == ida.o_displ: reg = cmd[op].reg if reg >= 0 and reg <= 31: reg = regs.gpr(reg) elif reg >= 32 and reg <= 63: reg = regs.fpr(reg) else: raise utils.BugError("bogus register %u" % reg) return ida.op_ret(ida.op_ty.displ, ida.displ(reg=reg, displ=immval(val)), 0) else: opnd = ida.resolve_opnd(target, val) if mnem == "addiu" and opnd.ty == ida.op_ty.value: # addiu is being used for regular addition; handle its third # operand as an immediate value return ida.op_ret(ida.op_ty.value, immval(opnd.val), 0) else: return opnd
def fmt_insn(ea, our_fns, extern_reg_map, stkvars, from_delay): '''ea_t -> frozenset(str) -> {str : reg_sig} -> {int : c_ast()} -> (ea_t, str)''' # XXX this function is too long and its interaction with the formatter steps # is not very clear # NOTE mutation in a few places # # we cannot rely simply on IDA's disassembly when generating C. e.g.: # # .text:100052F4 lwc1 $f12, (square - 0x10008E50)($s1) # # THIS means f12 = square[0] (square is declared as an array). but... # # .text:100041A4 lw $a1, (seqList - 0x1000BF78)($a1) # # THIS means a1 = seqList--NOT *seqList or seqList[0]. GetOperand and # similar functions are thus useless for our purposes. unfortunately, we # have no choice but to handle C's type system in order to emit C from # disassembly. we don't COMPLETELY handle it (patches welcome!!!1), but we # do achieve enough that with minor database annotations we have a POC in # our chosen target. def labelize(ea, stmt): if from_delay is False: label = ida.name(ea) if label != '': return c_ast.Label(label, stmt) else: return stmt else: return stmt fn = ida.get_func(ea) fn_name = ida.get_func_name(ea) fn_end = fn.endEA mnem = ida.get_mnem(ea) insn = insns.insns[mnem] is_delayed = mnem in insns.delayed delay_ea = ida.next_head(ea, fn_end) next_ea = (delay_ea if is_delayed is False else ida.next_head(delay_ea, fn_end)) if ida.is_switch_insn(ea) is True: # don't emit stmts that IDA marks as being part of a switch idiom # # pass delay_ea as the next ea to check, because we may have a case in # which a non-switch insn follows a delayed switch insn return next_ea_and_c(delay_ea, [labelize(ea, c_ast.EmptyStatement())]) opvals = ida.get_opvals(ea, stkvars) # addiu has many forms, some of which require transformation into # two-operand statements, others which need to be kept as three-operand # statements, so we have to handle it here, not fmt_op # # we can elide a previous modification to that register within a basic # block if it has no uses between a modification and the addiu, though we # don't yet do this if mnem == 'addiu' and opvals[-1].ty != ida.op_ty.value: # handles cases where addiu is effectively an assignment (e.g. when # used for address calculation) # first op is always a register reg = fmt_op(opvals[0], mnem, 0) # any non-number as the final operand should be handled according to # fmt_op's usual rules arg = fmt_op(opvals[-1], mnem) assign = labelize(ea, ep_ct.do_assign(rt=reg, op=ep_ct.cast_to_dest_reg(insn, arg))) return next_ea_and_c(next_ea, [assign]) if mnem == 'trunc.w.d': # emulate trunc.w.d with our function vals = [fmt_reg(mnem, opvals[0].val, insn.result), fmt_reg(mnem, opvals[1].val, insn.slots[0])] return next_ea_and_c(next_ea, [labelize(ea, ep_ct.make_call(insn.subst, ret_reg=vals[0], args=ep_ct.args_for_call([vals[1]])))]) elif mnem in ['jalr', 'jr']: # jalr and jr need special handling vals = [] else: vals = list(fmt_op(x, mnem, op) for (op, x) in enumerate(opvals)) if insn.ty == insns.types.usefn: if insn.subst == 'memcpy': # this should be redesigned to not use memcpy just to make the # generated code a little nicer, but the complexity hasn't been # worth it. the issue is: the fact that [ls][dw]c1 move data # between the fpu and memory is no guarantee that the data held in # an fpu register is actually a float or a double, which complicates # the logic a little bit. fortunately, we can just use memcpy # instead, and modern compilers will inline it so that it's # equivalent to a load/store for small sizes. if mnem in ['ldc1', 'sdc1']: size = 8 elif mnem in ['lwc1', 'swc1']: size = 4 else: raise utils.BugError('unhandled usefn insn %s' % mnem) # need to swap the order of arguments for a store, since loads and # stores are written in the same direction, but they aren't in C! args = list(reversed(vals) if mnem.startswith('s') else vals) return next_ea_and_c(next_ea, [labelize(ea, ep_ct.make_call(insn.subst, args=ep_ct.args_for_call([args[0], args[1], c_ast.Constant('int', str(size))])))]) else: raise utils.BugError('unhandled usefn instruction %s' % mnem) else: args = make_args_for_formatter(insn, vals) if is_delayed is True: # format our delayed instruction before processing this instruction--but # see below for an important note about the case of branch likely (_, delay_slot) = fmt_insn(delay_ea, our_fns, extern_reg_map, stkvars, from_delay=True) # branch target loc = opvals[-1].val if mnem == 'jr': delayed = do_switch_or_return(ea) elif insn.ty == insns.types.call: callee = data.get_callee(ea, mnem, args) if callee in our_fns: delayed = ep_ct.internal_call(callee) else: # external function call try: sig = extern_reg_map[callee] except KeyError: # XXX we should really modify pycparser to allow insertion # of comments, as it would make the emitted codemuch easier # to follow. just alert the user that we couldn't make some # calls for now print ('/* %s: no regmap info, emitting empty stmt at %s */' % (callee, ida.atoa(ea))) delayed = ep_ct.do_nop() else: delayed = extern_call(callee, sig, mnem, ea) else: # some other sort of delayed insn delayed = get_formatter(mnem)(**args) goto = c_ast.Goto(loc) if insns.subtypes.likely in insn.subty: # for branch likely, the delay slot is NOT executed if the branch is # not taken delayed.iftrue = c_ast.Compound(delay_slot + [goto, labelize(delay_ea, delay_slot[0])]) ret = labelize(ea, delayed) else: if insn.ty in [insns.types.branch, insns.types.fbranch]: delayed.iftrue = c_ast.Compound(delay_slot + [goto]) delayed.iffalse = labelize(delay_ea, delay_slot[0]) ret = labelize(ea, delayed) else: ret = labelize(ea, c_ast.Compound([labelize(delay_ea, delay_slot[0]), delayed])) return next_ea_and_c(next_ea, [ret]) return next_ea_and_c(next_ea, [labelize(ea, get_formatter(mnem)(**args))])