def get_swval(start_ea): '''ea_t -> (str, op_ty, int)''' # XXX imperative ida.get_switch_info(start_ea) # confirm this insn has switch info distance = 0 ea = ida.prev_head(start_ea) while True: if distance > 10: raise SwitchTroubleError( 'gave up looking for switch value between %s..%s' % (ida.atoa(ea), ida.atoa(start_ea))) if ida.is_switch_insn(ea): mnem = ida.get_mnem(ea) # dunno if there are other common switch idioms if mnem == 'sltiu': return (mnem, ida.get_op(ea, 1), 1) elif list(ida.code_refs_from(ea, 0)) != []: raise SwitchTroubleError( 'encountered branch/jump while looking for switch value ' + 'between %s..%s' % (ida.atoa(ea), ida.atoa(start_ea))) ea = ida.prev_head(ea) distance += 1
def get_swval(start_ea): """ea_t -> (str, op_ty, int)""" # XXX imperative ida.get_switch_info(start_ea) # confirm this insn has switch info distance = 0 ea = ida.prev_head(start_ea) while True: if distance > 10: raise SwitchTroubleError( "gave up looking for switch value between %s..%s" % (ida.atoa(ea), ida.atoa(start_ea)) ) if ida.is_switch_insn(ea): mnem = ida.get_mnem(ea) # dunno if there are other common switch idioms if mnem == "sltiu": return (mnem, ida.get_op(ea, 1), 1) elif list(ida.code_refs_from(ea, 0)) != []: raise SwitchTroubleError( "encountered branch/jump while looking for switch value " + "between %s..%s" % (ida.atoa(ea), ida.atoa(start_ea)) ) ea = ida.prev_head(ea) distance += 1
def get_arg_for_va_function(callee, start_ea): '''str -> ea_t -> str''' # XXX hacky; not a very general function # XXX imperative # get a relevant item needed for processing a variadic function sw = { 'printf' : regs.gpr(abi.arg_regs[0]), 'scanf' : regs.gpr(abi.arg_regs[0]), 'sscanf' : regs.gpr(abi.arg_regs[1]) } try: wanted_reg = sw[callee] except KeyError: raise utils.BugError('unrecognized callee %s' % callee) distance = 0 fn = ida.get_func(start_ea) # first, look at the delay slot ea = ida.next_head(start_ea, fn.endEA) while True: if distance > 10: raise VarargsError( 'gave up looking for needed varargs argument for %s between ' + '%s..%s' % (ida.atoa(ea), ida.atoa(start_ea))) if ea == start_ea: ea = ida.prev_head(ea) continue # skip the call insn elif list(ida.code_refs_from(ea, 0)) != []: raise VarargsError( 'encountered branch/jump while looking for varargs argument ' + 'between %s..%s' % (ida.atoa(ea), ida.atoa(start_ea))) rd = ida.get_op(ea, 0) if rd.val == wanted_reg: opvals = ida.get_opvals(ea) # XXX should try to track stkvar values s = ida.get_string(opvals[-1].target) if s is not None: return s ea = ida.prev_head(ea) distance += 1
def get_arg_for_va_function(callee, start_ea): """str -> ea_t -> str""" # XXX hacky; not a very general function # XXX imperative # get a relevant item needed for processing a variadic function sw = {"printf": regs.gpr(abi.arg_regs[0]), "scanf": regs.gpr(abi.arg_regs[0]), "sscanf": regs.gpr(abi.arg_regs[1])} try: wanted_reg = sw[callee] except KeyError: raise utils.BugError("unrecognized callee %s" % callee) distance = 0 fn = ida.get_func(start_ea) # first, look at the delay slot ea = ida.next_head(start_ea, fn.endEA) while True: if distance > 10: raise VarargsError( "gave up looking for needed varargs argument for %s between " + "%s..%s" % (ida.atoa(ea), ida.atoa(start_ea)) ) if ea == start_ea: ea = ida.prev_head(ea) continue # skip the call insn elif list(ida.code_refs_from(ea, 0)) != []: raise VarargsError( "encountered branch/jump while looking for varargs argument " + "between %s..%s" % (ida.atoa(ea), ida.atoa(start_ea)) ) rd = ida.get_op(ea, 0) if rd.val == wanted_reg: opvals = ida.get_opvals(ea) # XXX should try to track stkvar values s = ida.get_string(opvals[-1].target) if s is not None: return s ea = ida.prev_head(ea) distance += 1
def get_callee(ea, mnem, args): """ea_t -> str -> formatter_args -> str""" # XXX we don't handle function pointers yet, but it should be easy now that # we handle all internal functions as having the same signature...i think if mnem == "jalr": nn = ida.netnode("$ mips") fun = nn.altval(ea) - 1 if fun == -1: raise FunctionPointerError("unknown target for function pointer at %s" % ida.atoa(ea)) return ida.name(fun) elif mnem == "jal": return args["rs"].name else: raise utils.BugError("unhandled call insn: %s" % mnem)
def get_callee(ea, mnem, args): '''ea_t -> str -> formatter_args -> str''' # XXX we don't handle function pointers yet, but it should be easy now that # we handle all internal functions as having the same signature...i think if mnem == 'jalr': nn = ida.netnode('$ mips') fun = nn.altval(ea) - 1 if fun == -1: raise FunctionPointerError('unknown target for function pointer at %s' % ida.atoa(ea)) return ida.name(fun) elif mnem == 'jal': return args['rs'].name else: raise utils.BugError('unhandled call insn: %s' % mnem)
def get_one_item(ea, ti, sz): '''ea_t -> tinfo_t -> int -> c.types obj | int | long?''' if ti.is_float(): return ep_ct.cfloat(ida.get_float(ea)) elif ti.is_double(): return ep_ct.cdouble(ida.get_double(ea)) else: try: # NOTE this gets the SOURCE'S size for a given type, which may not # match the target's size_to_fn = {1: ida.byte, 2: ida.word, 4: ida.dword, 8: ida.qword} fn = size_to_fn[sz] except KeyError: raise UnknownDataTypeError('unknown data type at %s' % ida.atoa(ea)) return fn(ea)
def get_one_item(ea, ti, sz): '''ea_t -> tinfo_t -> int -> c.types obj | int | long?''' if ti.is_float(): return ep_ct.cfloat(ida.get_float(ea)) elif ti.is_double(): return ep_ct.cdouble(ida.get_double(ea)) else: try: # NOTE this gets the SOURCE'S size for a given type, which may not # match the target's size_to_fn = { 1 : ida.byte, 2 : ida.word, 4 : ida.dword, 8 : ida.qword} fn = size_to_fn[sz] except KeyError: raise UnknownDataTypeError('unknown data type at %s' % ida.atoa(ea)) return fn(ea)
def get_callee(ea, mnem, args): '''ea_t -> str -> formatter_args -> str''' # XXX we don't handle function pointers yet, but it should be easy now that # we handle all internal functions as having the same signature...i think if mnem == 'jalr': nn = ida.netnode('$ mips') fun = nn.altval(ea) - 1 if fun == -1: raise FunctionPointerError( 'unknown target for function pointer at %s' % ida.atoa(ea)) return ida.name(fun) elif mnem == 'jal': return args['rs'].name else: raise utils.BugError('unhandled call insn: %s' % mnem)
def do_switch_or_return(ea): if ida.is_ret_insn(ea): return c_ast.Return(None) else: # switch try: sw = ida.switch_cases(ea) except ida.NoSwitchError: raise utils.BugError('unhandled jr at ea %s' % ida.atoa(ea)) default = sw.default defexpr = [c_ast.Default([c_ast.Goto(ida.name(default))])] cases = list(c_ast.Case(c_ast.Constant('int', str(addr)), [c_ast.Goto(ida.name(loc))]) for (addr, loc) in sw.cases.iteritems()) (mnem, opnd, opn) = data.get_swval(ea) swval = fmt_op(opnd, mnem, opn) return c_ast.Switch(swval, c_ast.Compound(cases + defexpr))
def fmt_insn(ea, our_fns, extern_reg_map, stkvars, from_delay): '''ea_t -> frozenset(str) -> {str : reg_sig} -> {int : c_ast()} -> (ea_t, str)''' # XXX this function is too long and its interaction with the formatter steps # is not very clear # NOTE mutation in a few places # # we cannot rely simply on IDA's disassembly when generating C. e.g.: # # .text:100052F4 lwc1 $f12, (square - 0x10008E50)($s1) # # THIS means f12 = square[0] (square is declared as an array). but... # # .text:100041A4 lw $a1, (seqList - 0x1000BF78)($a1) # # THIS means a1 = seqList--NOT *seqList or seqList[0]. GetOperand and # similar functions are thus useless for our purposes. unfortunately, we # have no choice but to handle C's type system in order to emit C from # disassembly. we don't COMPLETELY handle it (patches welcome!!!1), but we # do achieve enough that with minor database annotations we have a POC in # our chosen target. def labelize(ea, stmt): if from_delay is False: label = ida.name(ea) if label != '': return c_ast.Label(label, stmt) else: return stmt else: return stmt fn = ida.get_func(ea) fn_name = ida.get_func_name(ea) fn_end = fn.endEA mnem = ida.get_mnem(ea) insn = insns.insns[mnem] is_delayed = mnem in insns.delayed delay_ea = ida.next_head(ea, fn_end) next_ea = (delay_ea if is_delayed is False else ida.next_head(delay_ea, fn_end)) if ida.is_switch_insn(ea) is True: # don't emit stmts that IDA marks as being part of a switch idiom # # pass delay_ea as the next ea to check, because we may have a case in # which a non-switch insn follows a delayed switch insn return next_ea_and_c(delay_ea, [labelize(ea, c_ast.EmptyStatement())]) opvals = ida.get_opvals(ea, stkvars) # addiu has many forms, some of which require transformation into # two-operand statements, others which need to be kept as three-operand # statements, so we have to handle it here, not fmt_op # # we can elide a previous modification to that register within a basic # block if it has no uses between a modification and the addiu, though we # don't yet do this if mnem == 'addiu' and opvals[-1].ty != ida.op_ty.value: # handles cases where addiu is effectively an assignment (e.g. when # used for address calculation) # first op is always a register reg = fmt_op(opvals[0], mnem, 0) # any non-number as the final operand should be handled according to # fmt_op's usual rules arg = fmt_op(opvals[-1], mnem) assign = labelize(ea, ep_ct.do_assign(rt=reg, op=ep_ct.cast_to_dest_reg(insn, arg))) return next_ea_and_c(next_ea, [assign]) if mnem == 'trunc.w.d': # emulate trunc.w.d with our function vals = [fmt_reg(mnem, opvals[0].val, insn.result), fmt_reg(mnem, opvals[1].val, insn.slots[0])] return next_ea_and_c(next_ea, [labelize(ea, ep_ct.make_call(insn.subst, ret_reg=vals[0], args=ep_ct.args_for_call([vals[1]])))]) elif mnem in ['jalr', 'jr']: # jalr and jr need special handling vals = [] else: vals = list(fmt_op(x, mnem, op) for (op, x) in enumerate(opvals)) if insn.ty == insns.types.usefn: if insn.subst == 'memcpy': # this should be redesigned to not use memcpy just to make the # generated code a little nicer, but the complexity hasn't been # worth it. the issue is: the fact that [ls][dw]c1 move data # between the fpu and memory is no guarantee that the data held in # an fpu register is actually a float or a double, which complicates # the logic a little bit. fortunately, we can just use memcpy # instead, and modern compilers will inline it so that it's # equivalent to a load/store for small sizes. if mnem in ['ldc1', 'sdc1']: size = 8 elif mnem in ['lwc1', 'swc1']: size = 4 else: raise utils.BugError('unhandled usefn insn %s' % mnem) # need to swap the order of arguments for a store, since loads and # stores are written in the same direction, but they aren't in C! args = list(reversed(vals) if mnem.startswith('s') else vals) return next_ea_and_c(next_ea, [labelize(ea, ep_ct.make_call(insn.subst, args=ep_ct.args_for_call([args[0], args[1], c_ast.Constant('int', str(size))])))]) else: raise utils.BugError('unhandled usefn instruction %s' % mnem) else: args = make_args_for_formatter(insn, vals) if is_delayed is True: # format our delayed instruction before processing this instruction--but # see below for an important note about the case of branch likely (_, delay_slot) = fmt_insn(delay_ea, our_fns, extern_reg_map, stkvars, from_delay=True) # branch target loc = opvals[-1].val if mnem == 'jr': delayed = do_switch_or_return(ea) elif insn.ty == insns.types.call: callee = data.get_callee(ea, mnem, args) if callee in our_fns: delayed = ep_ct.internal_call(callee) else: # external function call try: sig = extern_reg_map[callee] except KeyError: # XXX we should really modify pycparser to allow insertion # of comments, as it would make the emitted codemuch easier # to follow. just alert the user that we couldn't make some # calls for now print ('/* %s: no regmap info, emitting empty stmt at %s */' % (callee, ida.atoa(ea))) delayed = ep_ct.do_nop() else: delayed = extern_call(callee, sig, mnem, ea) else: # some other sort of delayed insn delayed = get_formatter(mnem)(**args) goto = c_ast.Goto(loc) if insns.subtypes.likely in insn.subty: # for branch likely, the delay slot is NOT executed if the branch is # not taken delayed.iftrue = c_ast.Compound(delay_slot + [goto, labelize(delay_ea, delay_slot[0])]) ret = labelize(ea, delayed) else: if insn.ty in [insns.types.branch, insns.types.fbranch]: delayed.iftrue = c_ast.Compound(delay_slot + [goto]) delayed.iffalse = labelize(delay_ea, delay_slot[0]) ret = labelize(ea, delayed) else: ret = labelize(ea, c_ast.Compound([labelize(delay_ea, delay_slot[0]), delayed])) return next_ea_and_c(next_ea, [ret]) return next_ea_and_c(next_ea, [labelize(ea, get_formatter(mnem)(**args))])