Exemple #1
0
def get_swval(start_ea):
    '''ea_t -> (str, op_ty, int)'''
    # XXX imperative
    ida.get_switch_info(start_ea) # confirm this insn has switch info

    distance = 0
    ea = ida.prev_head(start_ea)

    while True:
        if distance > 10:
            raise SwitchTroubleError(
                'gave up looking for switch value between %s..%s'
                % (ida.atoa(ea), ida.atoa(start_ea)))

        if ida.is_switch_insn(ea):
            mnem = ida.get_mnem(ea)
            # dunno if there are other common switch idioms
            if mnem == 'sltiu':
                return (mnem, ida.get_op(ea, 1), 1)
        elif list(ida.code_refs_from(ea, 0)) != []:
            raise SwitchTroubleError(
                'encountered branch/jump while looking for switch value ' +
                'between %s..%s' % (ida.atoa(ea), ida.atoa(start_ea)))

        ea = ida.prev_head(ea)
        distance += 1
Exemple #2
0
def get_swval(start_ea):
    """ea_t -> (str, op_ty, int)"""
    # XXX imperative
    ida.get_switch_info(start_ea)  # confirm this insn has switch info

    distance = 0
    ea = ida.prev_head(start_ea)

    while True:
        if distance > 10:
            raise SwitchTroubleError(
                "gave up looking for switch value between %s..%s" % (ida.atoa(ea), ida.atoa(start_ea))
            )

        if ida.is_switch_insn(ea):
            mnem = ida.get_mnem(ea)
            # dunno if there are other common switch idioms
            if mnem == "sltiu":
                return (mnem, ida.get_op(ea, 1), 1)
        elif list(ida.code_refs_from(ea, 0)) != []:
            raise SwitchTroubleError(
                "encountered branch/jump while looking for switch value "
                + "between %s..%s" % (ida.atoa(ea), ida.atoa(start_ea))
            )

        ea = ida.prev_head(ea)
        distance += 1
Exemple #3
0
def get_arg_for_va_function(callee, start_ea):
    '''str -> ea_t -> str'''
    # XXX hacky; not a very general function
    # XXX imperative
    # get a relevant item needed for processing a variadic function
    sw = {
        'printf' : regs.gpr(abi.arg_regs[0]),
        'scanf' : regs.gpr(abi.arg_regs[0]),
        'sscanf' : regs.gpr(abi.arg_regs[1])
    }
    try:
        wanted_reg = sw[callee]
    except KeyError:
        raise utils.BugError('unrecognized callee %s' % callee)

    distance = 0
    fn = ida.get_func(start_ea)
    # first, look at the delay slot
    ea = ida.next_head(start_ea, fn.endEA)

    while True:
        if distance > 10:
            raise VarargsError(
                'gave up looking for needed varargs argument for %s between ' +
                '%s..%s' % (ida.atoa(ea), ida.atoa(start_ea)))

        if ea == start_ea:
            ea = ida.prev_head(ea)
            continue # skip the call insn
        elif list(ida.code_refs_from(ea, 0)) != []:
            raise VarargsError(
                'encountered branch/jump while looking for varargs argument ' +
                'between %s..%s' % (ida.atoa(ea), ida.atoa(start_ea)))

        rd = ida.get_op(ea, 0)
        if rd.val == wanted_reg:
            opvals = ida.get_opvals(ea) # XXX should try to track stkvar values
            s = ida.get_string(opvals[-1].target)
            if s is not None:
                return s

        ea = ida.prev_head(ea)
        distance += 1
Exemple #4
0
def get_arg_for_va_function(callee, start_ea):
    """str -> ea_t -> str"""
    # XXX hacky; not a very general function
    # XXX imperative
    # get a relevant item needed for processing a variadic function
    sw = {"printf": regs.gpr(abi.arg_regs[0]), "scanf": regs.gpr(abi.arg_regs[0]), "sscanf": regs.gpr(abi.arg_regs[1])}
    try:
        wanted_reg = sw[callee]
    except KeyError:
        raise utils.BugError("unrecognized callee %s" % callee)

    distance = 0
    fn = ida.get_func(start_ea)
    # first, look at the delay slot
    ea = ida.next_head(start_ea, fn.endEA)

    while True:
        if distance > 10:
            raise VarargsError(
                "gave up looking for needed varargs argument for %s between "
                + "%s..%s" % (ida.atoa(ea), ida.atoa(start_ea))
            )

        if ea == start_ea:
            ea = ida.prev_head(ea)
            continue  # skip the call insn
        elif list(ida.code_refs_from(ea, 0)) != []:
            raise VarargsError(
                "encountered branch/jump while looking for varargs argument "
                + "between %s..%s" % (ida.atoa(ea), ida.atoa(start_ea))
            )

        rd = ida.get_op(ea, 0)
        if rd.val == wanted_reg:
            opvals = ida.get_opvals(ea)  # XXX should try to track stkvar values
            s = ida.get_string(opvals[-1].target)
            if s is not None:
                return s

        ea = ida.prev_head(ea)
        distance += 1
Exemple #5
0
def get_callee(ea, mnem, args):
    """ea_t -> str -> formatter_args -> str"""
    # XXX we don't handle function pointers yet, but it should be easy now that
    # we handle all internal functions as having the same signature...i think
    if mnem == "jalr":
        nn = ida.netnode("$ mips")
        fun = nn.altval(ea) - 1
        if fun == -1:
            raise FunctionPointerError("unknown target for function pointer at %s" % ida.atoa(ea))
        return ida.name(fun)
    elif mnem == "jal":
        return args["rs"].name
    else:
        raise utils.BugError("unhandled call insn: %s" % mnem)
Exemple #6
0
def get_callee(ea, mnem, args):
    '''ea_t -> str -> formatter_args -> str'''
    # XXX we don't handle function pointers yet, but it should be easy now that
    # we handle all internal functions as having the same signature...i think
    if mnem == 'jalr':
        nn = ida.netnode('$ mips')
        fun = nn.altval(ea) - 1
        if fun == -1:
            raise FunctionPointerError('unknown target for function pointer at %s' % ida.atoa(ea))
        return ida.name(fun)
    elif mnem == 'jal':
        return args['rs'].name
    else:
        raise utils.BugError('unhandled call insn: %s' % mnem)
Exemple #7
0
def get_one_item(ea, ti, sz):
    '''ea_t -> tinfo_t -> int -> c.types obj | int | long?'''
    if ti.is_float():
        return ep_ct.cfloat(ida.get_float(ea))
    elif ti.is_double():
        return ep_ct.cdouble(ida.get_double(ea))
    else:
        try:
            # NOTE this gets the SOURCE'S size for a given type, which may not
            # match the target's
            size_to_fn = {1: ida.byte, 2: ida.word, 4: ida.dword, 8: ida.qword}
            fn = size_to_fn[sz]
        except KeyError:
            raise UnknownDataTypeError('unknown data type at %s' % ida.atoa(ea))
    return fn(ea)
Exemple #8
0
def get_one_item(ea, ti, sz):
    '''ea_t -> tinfo_t -> int -> c.types obj | int | long?'''
    if ti.is_float():
        return ep_ct.cfloat(ida.get_float(ea))
    elif ti.is_double():
        return ep_ct.cdouble(ida.get_double(ea))
    else:
        try:
            # NOTE this gets the SOURCE'S size for a given type, which may not
            # match the target's
            size_to_fn = {
                1 : ida.byte, 2 : ida.word, 4 : ida.dword, 8 : ida.qword}
            fn = size_to_fn[sz]
        except KeyError:
            raise UnknownDataTypeError('unknown data type at %s' % ida.atoa(ea))
    return fn(ea)
Exemple #9
0
def get_callee(ea, mnem, args):
    '''ea_t -> str -> formatter_args -> str'''
    # XXX we don't handle function pointers yet, but it should be easy now that
    # we handle all internal functions as having the same signature...i think
    if mnem == 'jalr':
        nn = ida.netnode('$ mips')
        fun = nn.altval(ea) - 1
        if fun == -1:
            raise FunctionPointerError(
                'unknown target for function pointer at %s'
                % ida.atoa(ea))
        return ida.name(fun)
    elif mnem == 'jal':
        return args['rs'].name
    else:
        raise utils.BugError('unhandled call insn: %s' % mnem)
Exemple #10
0
def do_switch_or_return(ea):
    if ida.is_ret_insn(ea):
        return c_ast.Return(None)
    else:  # switch
        try:
            sw = ida.switch_cases(ea)
        except ida.NoSwitchError:
            raise utils.BugError('unhandled jr at ea %s' % ida.atoa(ea))

        default = sw.default
        defexpr = [c_ast.Default([c_ast.Goto(ida.name(default))])]
        cases = list(c_ast.Case(c_ast.Constant('int', str(addr)),
                                [c_ast.Goto(ida.name(loc))])
                     for (addr, loc) in sw.cases.iteritems())
        (mnem, opnd, opn) = data.get_swval(ea)
        swval = fmt_op(opnd, mnem, opn)
        return c_ast.Switch(swval, c_ast.Compound(cases + defexpr))
Exemple #11
0
def fmt_insn(ea, our_fns, extern_reg_map, stkvars, from_delay):
    '''ea_t -> frozenset(str) -> {str : reg_sig} -> {int : c_ast()} -> (ea_t, str)'''
    # XXX this function is too long and its interaction with the formatter steps
    # is not very clear
    # NOTE mutation in a few places
    #
    # we cannot rely simply on IDA's disassembly when generating C.  e.g.:
    #
    # .text:100052F4                 lwc1    $f12, (square - 0x10008E50)($s1)
    #
    # THIS means f12 = square[0] (square is declared as an array).  but...
    #
    # .text:100041A4                 lw      $a1, (seqList - 0x1000BF78)($a1)
    #
    # THIS means a1 = seqList--NOT *seqList or seqList[0].  GetOperand and
    # similar functions are thus useless for our purposes.  unfortunately, we
    # have no choice but to handle C's type system in order to emit C from
    # disassembly.  we don't COMPLETELY handle it (patches welcome!!!1), but we
    # do achieve enough that with minor database annotations we have a POC in
    # our chosen target.
    def labelize(ea, stmt):
        if from_delay is False:
            label = ida.name(ea)
            if label != '':
                return c_ast.Label(label, stmt)
            else:
                return stmt
        else:
            return stmt

    fn = ida.get_func(ea)
    fn_name = ida.get_func_name(ea)
    fn_end = fn.endEA
    mnem = ida.get_mnem(ea)
    insn = insns.insns[mnem]
    is_delayed = mnem in insns.delayed
    delay_ea = ida.next_head(ea, fn_end)
    next_ea = (delay_ea
               if is_delayed is False
               else ida.next_head(delay_ea, fn_end))

    if ida.is_switch_insn(ea) is True:
        # don't emit stmts that IDA marks as being part of a switch idiom
        #
        # pass delay_ea as the next ea to check, because we may have a case in
        # which a non-switch insn follows a delayed switch insn
        return next_ea_and_c(delay_ea, [labelize(ea, c_ast.EmptyStatement())])

    opvals = ida.get_opvals(ea, stkvars)

    # addiu has many forms, some of which require transformation into
    # two-operand statements, others which need to be kept as three-operand
    # statements, so we have to handle it here, not fmt_op
    #
    # we can elide a previous modification to that register within a basic
    # block if it has no uses between a modification and the addiu, though we
    # don't yet do this
    if mnem == 'addiu' and opvals[-1].ty != ida.op_ty.value:
        # handles cases where addiu is effectively an assignment (e.g. when
        # used for address calculation)

        # first op is always a register
        reg = fmt_op(opvals[0], mnem, 0)
        # any non-number as the final operand should be handled according to
        # fmt_op's usual rules
        arg = fmt_op(opvals[-1], mnem)
        assign = labelize(ea, ep_ct.do_assign(rt=reg, op=ep_ct.cast_to_dest_reg(insn, arg)))
        return next_ea_and_c(next_ea, [assign])

    if mnem == 'trunc.w.d':
        # emulate trunc.w.d with our function
        vals = [fmt_reg(mnem, opvals[0].val, insn.result), fmt_reg(mnem, opvals[1].val, insn.slots[0])]
        return next_ea_and_c(next_ea, [labelize(ea, ep_ct.make_call(insn.subst, ret_reg=vals[0], args=ep_ct.args_for_call([vals[1]])))])
    elif mnem in ['jalr', 'jr']:
        # jalr and jr need special handling
        vals = []
    else:
        vals = list(fmt_op(x, mnem, op) for (op, x) in enumerate(opvals))

    if insn.ty == insns.types.usefn:
        if insn.subst == 'memcpy':
            # this should be redesigned to not use memcpy just to make the
            # generated code a little nicer, but the complexity hasn't been
            # worth it.  the issue is: the fact that [ls][dw]c1 move data
            # between the fpu and memory is no guarantee that the data held in
            # an fpu register is actually a float or a double, which complicates
            # the logic a little bit.  fortunately, we can just use memcpy
            # instead, and modern compilers will inline it so that it's
            # equivalent to a load/store for small sizes.
            if mnem in ['ldc1', 'sdc1']:
                size = 8
            elif mnem in ['lwc1', 'swc1']:
                size = 4
            else:
                raise utils.BugError('unhandled usefn insn %s' % mnem)

            # need to swap the order of arguments for a store, since loads and
            # stores are written in the same direction, but they aren't in C!
            args = list(reversed(vals) if mnem.startswith('s') else vals)
            return next_ea_and_c(next_ea, [labelize(ea, ep_ct.make_call(insn.subst, args=ep_ct.args_for_call([args[0], args[1], c_ast.Constant('int', str(size))])))])
        else:
            raise utils.BugError('unhandled usefn instruction %s' % mnem)
    else:
        args = make_args_for_formatter(insn, vals)

    if is_delayed is True:
        # format our delayed instruction before processing this instruction--but
        # see below for an important note about the case of branch likely
        (_, delay_slot) = fmt_insn(delay_ea, our_fns, extern_reg_map, stkvars, from_delay=True)
        # branch target
        loc = opvals[-1].val

        if mnem == 'jr':
            delayed = do_switch_or_return(ea)
        elif insn.ty == insns.types.call:
            callee = data.get_callee(ea, mnem, args)

            if callee in our_fns:
                delayed = ep_ct.internal_call(callee)
            else:  # external function call
                try:
                    sig = extern_reg_map[callee]
                except KeyError:
                    # XXX we should really modify pycparser to allow insertion
                    # of comments, as it would make the emitted codemuch easier
                    # to follow.  just alert the user that we couldn't make some
                    # calls for now
                    print ('/* %s: no regmap info, emitting empty stmt at %s */' % (callee, ida.atoa(ea)))
                    delayed = ep_ct.do_nop()
                else:
                    delayed = extern_call(callee, sig, mnem, ea)
        else:  # some other sort of delayed insn
            delayed = get_formatter(mnem)(**args)

        goto = c_ast.Goto(loc)
        if insns.subtypes.likely in insn.subty:
            # for branch likely, the delay slot is NOT executed if the branch is
            # not taken
            delayed.iftrue = c_ast.Compound(delay_slot + [goto, labelize(delay_ea, delay_slot[0])])
            ret = labelize(ea, delayed)
        else:
            if insn.ty in [insns.types.branch, insns.types.fbranch]:
                delayed.iftrue = c_ast.Compound(delay_slot + [goto])
                delayed.iffalse = labelize(delay_ea, delay_slot[0])
                ret = labelize(ea, delayed)
            else:
                ret = labelize(ea, c_ast.Compound([labelize(delay_ea, delay_slot[0]), delayed]))
        return next_ea_and_c(next_ea, [ret])

    return next_ea_and_c(next_ea, [labelize(ea, get_formatter(mnem)(**args))])