Beispiel #1
0
def evaluate_lines(instr, lines, in_str):
    # Run the emulation of the basic bloc
    machine = mapper()

    def print_machine(machine):
        return sorted(str(machine).split("\n"))

    # Emulation with amoco is too slow
    # For 'movsd' tests depending of 'df' are not simplified, therefore there
    # are too many nested tests
    # For 'cmovXX' tests are not simplified either
    perf_count = {'movsd': 0, 'cmov': 0}
    for line in lines:
        # rip is kept symbolic
        if line.opname in ('call', 'jmp'):
            machine[env.rip] = env.rip
        else:
            machine[env.rip] = env.rip - line.bytelen
        if line.opname == 'movsd':
            perf_count['movsd'] += 1
            if perf_count['movsd'] >= 3:
                return (('Emulated too slow (movsd)', line,
                         print_machine(machine)), [None])
        if line.opname.startswith('cmov'):
            perf_count['cmov'] += 1
            if perf_count['cmov'] >= 3:
                return (('Emulated too slow (cmov)', line,
                         print_machine(machine)), [None])
        try:
            log.debug("EVAL %-20r %s", line.amoco.bytes, line)
            line.amoco(machine)
        except NotImplementedError:
            return (('Not implemented', line, print_machine(machine)), [None])
        except NameError:
            return (('Cannot be emulated (name)', line,
                     print_machine(machine)), [None])
        except amoco.arch.core.InstructionError:
            return (('Cannot be emulated', line, print_machine(machine)),
                    [None])
        except TypeError:
            return (('Not callable', line, print_machine(machine)), [None])
        if line.opname == 'call':
            # amoco emulation pushes rip+i.length
            # we prefer to push the label of the next basic bloc
            label = instr.symbols.find_symbol(section=line.section,
                                              address=line.offset +
                                              line.amoco.length)
            machine[env.mem(env.rsp,
                            cpu_addrsize)] = expressions.lab(label,
                                                             size=cpu_addrsize)
    retval = machine[env.rip]
    msg, val = evaluate(retval, machine, instr.symbols.find_symbols, instr,
                        in_str)
    if val is None:
        return ((str(retval.__class__), retval, print_machine(machine)),
                [None])
    elif val == [None]:
        return ((msg, retval, print_machine(machine)), [None])
    else:
        return (msg, val)
Beispiel #2
0
def remove_pic_offset(e, pool):
    log.debug("DETECT PIC FROM %s:%s", e.__class__.__name__, e)

    if e._is_tst:
        label_l = remove_pic_offset(e.l, pool)
        label_r = remove_pic_offset(e.r, pool)
        if label_l is None or label_r is None:
            return None
        return env.tst(e.tst, label_l, label_r)

    # M32[M32[M32[PIC_OFFSET+toto@GOT]]+cte]
    # => M32[M32[toto]+cte]
    if e._is_mem \
            and e.a.base._is_mem \
            and e.a.base.a.disp == 0 \
            and e.a.base.a.base._is_mem:
        label = remove_pic_offset(e.a.base.a.base, pool)
        if label is None:
            return None
        return env.mem(env.mem(label), disp=e.a.disp)

    # M32[M32[PIC_OFFSET+toto@GOTPCREL]+cte]
    # => M32[toto+cte]
    if e._is_mem and e.a.base._is_mem:
        label = remove_pic_offset(e.a.base, pool)
        if label is None:
            return None
        return env.mem(label, disp=e.a.disp)

    if e._is_mem and not hasattr(e.a.disp, '_is_lab'):
        log.debug("BASE %s; DISP %s; TODO", e.a.base, e.a.disp)
        return None

    # M32[PIC_OFFSET+toto@GOTPCREL]
    # => toto
    if e._is_mem \
            and e.a.disp._is_lab \
            and e.a.disp.ref.name.endswith('@GOTPCREL'):
        label_name = e.a.disp.ref.name[:-9]
        pic_data = e.a.base
        if not check_pic_data(pic_data):
            NON_REGRESSION_FOUND
            log.debug("PIC OFFSET [%s] LABEL %s", pic_data, label_name)
            return None
        return env.lab(pool.find_symbol(name=label_name), size=cpu_addrsize)
Beispiel #3
0
def getModRM(obj, Mod, RM, data, REX=None):
    opdsz = obj.misc['opdsz'] or 32
    adrsz = obj.misc['adrsz'] or 64
    seg = obj.misc['segreg']
    if seg is None: seg = ''
    W, R, X, B = REX or getREX(obj)
    if opdsz != 8 and W == 1: opdsz = 64
    # r/16/32/64 case:
    if Mod == 0b11:
        op1 = getregB(obj, RM, opdsz)
        return op1, data
    # SIB cases :
    if adrsz != 16 and RM == 0b100:
        # read SIB byte in data:
        if data.size < 8: raise InstructionError(obj)
        sib, data = data[0:8], data[8:data.size]
        # add sib byte:
        obj.bytes += pack(sib)
        # decode base & scaled index
        b = env.getreg((B << 3) + sib[0:3].int(), adrsz)
        i = env.getreg((X << 3) + sib[3:6].int(), adrsz)
        ss = 1 << (sib[6:8].int())
        s = i * ss if not i.ref in ('rsp', 'esp', 'sp') else 0
    else:
        s = 0
        if adrsz != 16:
            b = env.getreg((B << 3) + RM, adrsz)
        else:
            b = (env.bx + env.si, env.bx + env.di, env.bp + env.si,
                 env.bp + env.di, env.si, env.di, env.bp, env.bx)[RM]
    # check special disp32 case (RIP-relative addressing):
    if Mod == 0:
        if RM == 0b101:
            b = env.rip
            if seg is '': seg = env.cs
            Mod = 0b10
        elif b.ref in ('rbp', 'r13'):
            b = s + env.cst(0, adrsz)
            s = 0
            Mod = 0b10
    if s is 0:
        bs = b
    elif env.internals.get('keep_order'):
        # Instead of doing bs = b+s, which will reorder arguments, we do
        # the addition manually, and change 'prop' so the many future calls
        # to 'simplify' does not reorder the arguments
        bs = env.op('+', b, s)
        bs.prop |= 16
    else:
        bs = b + s
    # now read displacement bytes:
    if Mod == 0b00:
        d = 0
    elif Mod == 0b01:
        if data.size < 8: raise InstructionError(obj)
        d = data[0:8]
        data = data[8:data.size]
        obj.bytes += pack(d)
        d = d.signextend(adrsz).int(-1)
    elif Mod == 0b10:
        immsz = adrsz
        if immsz == 64: immsz = 32
        if data.size < immsz: raise InstructionError(obj)
        d = data[0:immsz]
        obj.bytes += pack(d)
        data = data[immsz:data.size]
        d = d.int(-1)
    if bs._is_cst and bs.v == 0x0:
        bs.v = d
        bs.size = adrsz
        d = 0
    if opdsz is 'mm': opdsz = 64
    return env.mem(bs, opdsz, seg, d), data
Beispiel #4
0
def getModRM(obj,Mod,RM,data):
    opdsz = obj.misc['opdsz'] or 32
    adrsz = obj.misc['adrsz'] or 64
    seg   = obj.misc['segreg']
    if seg is None: seg=''
    REX = obj.misc['REX']
    if REX is None:
        W=R=X=B=0
    else:
        W,R,X,B = REX
        if W==1: opdsz = 64
    # r/16/32 case:
    if Mod==0b11:
        op1 = env.getreg((B<<3)+RM,opdsz)
        return op1,data
    # m/16/32 case:
    if adrsz!=16 and RM==0b100:
        # read SIB byte in data:
        if data.size<8: raise InstructionError(obj)
        sib,data = data[0:8],data[8:data.size]
        # add sib byte:
        obj.bytes += pack(sib)
        # decode base & scaled index
        b = env.getreg((B<<3)+sib[0:3].int(),adrsz)
        i = env.getreg((X<<3)+sib[3:6].int(),adrsz)
        ss = 1<<(sib[6:8].int())
        s = i*ss if not i.ref in ('rsp','esp','sp') else 0
    else:
        s = 0
        if adrsz!=16:
            b = env.getreg((B<<3)+RM,adrsz)
        else:
            b =  (env.bx+env.si,
                  env.bx+env.di,
                  env.bp+env.si,
                  env.bp+env.di,
                  env.si,
                  env.di,
                  env.bp,
                  env.bx)[RM]

    # check [disp16/32] case:
    if (b is env.rbp or b is env.r13) and Mod==0:
        b=env.rip
        if seg is '': seg = env.cs
        Mod = 0b10
    if (b is env.bp) and Mod==0:
        b=env.cst(0,adrsz)
        Mod = 0b10
    # now read displacement bytes:
    if Mod==0b00:
        d = 0
    elif Mod==0b01:
        if data.size<8: raise InstructionError(obj)
        d = data[0:8]
        data = data[8:data.size]
        obj.bytes += pack(d)
        d = d.signextend(adrsz).int(-1)
    elif Mod==0b10:
        immsz = adrsz
        if immsz==64: immsz=32
        if data.size<immsz: raise InstructionError(obj)
        d = data[0:immsz]
        obj.bytes += pack(d)
        data = data[immsz:data.size]
        d = d.int(-1)
    bs = b+s
    if bs._is_cst and bs.v==0x0:
        bs.v = d
        bs.size = adrsz
        d = 0
    return env.mem(bs,opdsz,seg,d),data
Beispiel #5
0
def array_detection(input, machine, find, instr, in_str):
    log.debug("ARRAY_DETECT %s\n\t%s", input.__class__.__name__, input)
    dst_lst = []
    # Is it an element of an array?
    # Find the multiplication, replace it by 'index_in_array'
    index_var = env.ext('index_in_array', size=cpu_addrsize)
    item_len = 0
    if input.op.symbol == '+' and input.l._is_eqn:
        if input.l.op.symbol == '+' and input.l.l._is_eqn and \
           input.l.l.op.symbol == '*' and input.l.l.r._is_cst:
            item_len = int(input.l.l.r)
            input.l.l = index_var
        elif input.l.op.symbol == '+' and input.l.r._is_eqn and \
           input.l.r.op.symbol == '*' and input.l.r.r._is_cst:
            item_len = int(input.l.r.r)
            input.l.r = index_var
        elif input.l.op.symbol == '*' and input.l.r._is_cst:
            item_len = int(input.l.r)
            input.l = index_var
        elif input.l.op.symbol == '<<':
            item_len = 1 << int(input.l.r)
            input.l = index_var
    elif input.op.symbol == '+' and input.r._is_eqn:
        if input.r.op.symbol == '*' and input.r.r._is_cst:
            item_len = int(input.r.r)
            if input.r.l._is_ptr and input.r.l.disp == 0 and \
               input.r.l.base._is_eqn and input.r.l.base.op.symbol == '+' and \
               input.r.l.base.r._is_eqn and input.r.l.base.r.op.symbol == '*' \
               and input.r.l.base.r.r._is_cst \
               and input.r.l.base.l == input.r.l.base.r.l:
                NON_REGRESSION_FOUND
                item_len *= 1 + int(input.r.l.base.r.r)
            input.r = index_var
        elif input.r.op.symbol == '<<':
            NON_REGRESSION_FOUND
            item_len = 1 << int(input.r.r)
            input.r = index_var
    if item_len == 0:
        msg = 'MEM_EXP - NOT AN ARRAY'
        return msg, [None]
    log.debug("    ARRAY of %d-byte items", item_len)
    # Usually 4-byte items
    invalid_indexes = 0
    index_in_array = -item_len
    while invalid_indexes < 4:
        index_in_array += item_len
        m2 = mapper()
        m2[index_var] = env.cst(index_in_array, size=cpu_addrsize)
        address_in_array = input.eval(m2)
        log.debug("    x[%d] at %s:%s", index_in_array // item_len,
                  address_in_array.__class__.__name__, address_in_array)
        msg, val = 'NOT FOUND', None
        table, offset = expr.get_lab_imm(address_in_array)
        if val is None and offset is not None:
            msg, val = deref_table(table, offset, instr, in_str)
        if val is None:
            mapper.assume_no_aliasing = True
            offset = machine.M(env.mem(address_in_array))
            mapper.assume_no_aliasing = False
            offset = remove_got(offset, instr.symbols)
            v = expr.get_lab(offset)
            if v:
                msg, val = 'MEM', [v]
            table, offset = expr.get_lab_imm(expr.get_mem(offset, instr))
            if offset is not None:
                msg, val = deref_table(table, offset, instr, in_str)
        if val == 'TABLE':
            return msg, val
        if val in (None, [None]):
            log.debug("        ----> %s", msg)
            invalid_indexes += 1
            continue
        for label in val:
            if label.name.endswith('@GOTOFF'):
                NON_REGRESSION_FOUND
                # to make this work also with executables, we will need to
                # change our API and get the offset value that will have to
                # be substracted; removing @GOTOFF is not enough!
                label = find(name=label.name[:-7])[0]
            log.debug("        => %s", label)
            if not label in dst_lst:
                dst_lst.append(label)
    if dst_lst == []:
        return 'MEM_EXP', [None]
    return 'ARRAY', dst_lst