Example #1
0
 def simple_2op(insn, opvals):
     # cast the rvalue to the lvalue's type to avoid a warning for la
     # XXX do lw too?
     #
     # XXX we should also handle sw here, but op_ty doesn't carry enough
     # information to be able to do this.  see decomp.ida for why op_ty
     # sucks.  we need to be able to cast to the type of the lvalue, but
     # op_ty's knowledge of types isn't expressive enough
     op = (ep_ct.cast_to_dest_reg(insn, opvals[1])
           if insn.insn == 'la'
           else opvals[1])
     return dict(izip(['result', 'rt', 'op'], [insn.result, opvals[0], op]))
Example #2
0
def fmt_insn(ea, our_fns, extern_reg_map, stkvars, from_delay):
    '''ea_t -> frozenset(str) -> {str : reg_sig} -> {int : c_ast()} -> (ea_t, str)'''
    # XXX this function is too long and its interaction with the formatter steps
    # is not very clear
    # NOTE mutation in a few places
    #
    # we cannot rely simply on IDA's disassembly when generating C.  e.g.:
    #
    # .text:100052F4                 lwc1    $f12, (square - 0x10008E50)($s1)
    #
    # THIS means f12 = square[0] (square is declared as an array).  but...
    #
    # .text:100041A4                 lw      $a1, (seqList - 0x1000BF78)($a1)
    #
    # THIS means a1 = seqList--NOT *seqList or seqList[0].  GetOperand and
    # similar functions are thus useless for our purposes.  unfortunately, we
    # have no choice but to handle C's type system in order to emit C from
    # disassembly.  we don't COMPLETELY handle it (patches welcome!!!1), but we
    # do achieve enough that with minor database annotations we have a POC in
    # our chosen target.
    def labelize(ea, stmt):
        if from_delay is False:
            label = ida.name(ea)
            if label != '':
                return c_ast.Label(label, stmt)
            else:
                return stmt
        else:
            return stmt

    fn = ida.get_func(ea)
    fn_name = ida.get_func_name(ea)
    fn_end = fn.endEA
    mnem = ida.get_mnem(ea)
    insn = insns.insns[mnem]
    is_delayed = mnem in insns.delayed
    delay_ea = ida.next_head(ea, fn_end)
    next_ea = (delay_ea
               if is_delayed is False
               else ida.next_head(delay_ea, fn_end))

    if ida.is_switch_insn(ea) is True:
        # don't emit stmts that IDA marks as being part of a switch idiom
        #
        # pass delay_ea as the next ea to check, because we may have a case in
        # which a non-switch insn follows a delayed switch insn
        return next_ea_and_c(delay_ea, [labelize(ea, c_ast.EmptyStatement())])

    opvals = ida.get_opvals(ea, stkvars)

    # addiu has many forms, some of which require transformation into
    # two-operand statements, others which need to be kept as three-operand
    # statements, so we have to handle it here, not fmt_op
    #
    # we can elide a previous modification to that register within a basic
    # block if it has no uses between a modification and the addiu, though we
    # don't yet do this
    if mnem == 'addiu' and opvals[-1].ty != ida.op_ty.value:
        # handles cases where addiu is effectively an assignment (e.g. when
        # used for address calculation)

        # first op is always a register
        reg = fmt_op(opvals[0], mnem, 0)
        # any non-number as the final operand should be handled according to
        # fmt_op's usual rules
        arg = fmt_op(opvals[-1], mnem)
        assign = labelize(ea, ep_ct.do_assign(rt=reg, op=ep_ct.cast_to_dest_reg(insn, arg)))
        return next_ea_and_c(next_ea, [assign])

    if mnem == 'trunc.w.d':
        # emulate trunc.w.d with our function
        vals = [fmt_reg(mnem, opvals[0].val, insn.result), fmt_reg(mnem, opvals[1].val, insn.slots[0])]
        return next_ea_and_c(next_ea, [labelize(ea, ep_ct.make_call(insn.subst, ret_reg=vals[0], args=ep_ct.args_for_call([vals[1]])))])
    elif mnem in ['jalr', 'jr']:
        # jalr and jr need special handling
        vals = []
    else:
        vals = list(fmt_op(x, mnem, op) for (op, x) in enumerate(opvals))

    if insn.ty == insns.types.usefn:
        if insn.subst == 'memcpy':
            # this should be redesigned to not use memcpy just to make the
            # generated code a little nicer, but the complexity hasn't been
            # worth it.  the issue is: the fact that [ls][dw]c1 move data
            # between the fpu and memory is no guarantee that the data held in
            # an fpu register is actually a float or a double, which complicates
            # the logic a little bit.  fortunately, we can just use memcpy
            # instead, and modern compilers will inline it so that it's
            # equivalent to a load/store for small sizes.
            if mnem in ['ldc1', 'sdc1']:
                size = 8
            elif mnem in ['lwc1', 'swc1']:
                size = 4
            else:
                raise utils.BugError('unhandled usefn insn %s' % mnem)

            # need to swap the order of arguments for a store, since loads and
            # stores are written in the same direction, but they aren't in C!
            args = list(reversed(vals) if mnem.startswith('s') else vals)
            return next_ea_and_c(next_ea, [labelize(ea, ep_ct.make_call(insn.subst, args=ep_ct.args_for_call([args[0], args[1], c_ast.Constant('int', str(size))])))])
        else:
            raise utils.BugError('unhandled usefn instruction %s' % mnem)
    else:
        args = make_args_for_formatter(insn, vals)

    if is_delayed is True:
        # format our delayed instruction before processing this instruction--but
        # see below for an important note about the case of branch likely
        (_, delay_slot) = fmt_insn(delay_ea, our_fns, extern_reg_map, stkvars, from_delay=True)
        # branch target
        loc = opvals[-1].val

        if mnem == 'jr':
            delayed = do_switch_or_return(ea)
        elif insn.ty == insns.types.call:
            callee = data.get_callee(ea, mnem, args)

            if callee in our_fns:
                delayed = ep_ct.internal_call(callee)
            else:  # external function call
                try:
                    sig = extern_reg_map[callee]
                except KeyError:
                    # XXX we should really modify pycparser to allow insertion
                    # of comments, as it would make the emitted codemuch easier
                    # to follow.  just alert the user that we couldn't make some
                    # calls for now
                    print ('/* %s: no regmap info, emitting empty stmt at %s */' % (callee, ida.atoa(ea)))
                    delayed = ep_ct.do_nop()
                else:
                    delayed = extern_call(callee, sig, mnem, ea)
        else:  # some other sort of delayed insn
            delayed = get_formatter(mnem)(**args)

        goto = c_ast.Goto(loc)
        if insns.subtypes.likely in insn.subty:
            # for branch likely, the delay slot is NOT executed if the branch is
            # not taken
            delayed.iftrue = c_ast.Compound(delay_slot + [goto, labelize(delay_ea, delay_slot[0])])
            ret = labelize(ea, delayed)
        else:
            if insn.ty in [insns.types.branch, insns.types.fbranch]:
                delayed.iftrue = c_ast.Compound(delay_slot + [goto])
                delayed.iffalse = labelize(delay_ea, delay_slot[0])
                ret = labelize(ea, delayed)
            else:
                ret = labelize(ea, c_ast.Compound([labelize(delay_ea, delay_slot[0]), delayed]))
        return next_ea_and_c(next_ea, [ret])

    return next_ea_and_c(next_ea, [labelize(ea, get_formatter(mnem)(**args))])