def x86_arpl(ctx, i): dest_seg = operand.get(ctx, i, 0) src_seg = operand.get(ctx, i, 1) dest_rpl = ctx.tmp(16) src_rpl = ctx.tmp(16) tmp0 = ctx.tmp(32) tmp1 = ctx.tmp(8) result_seg = ctx.tmp(16) tmp2 = ctx.tmp(16) ctx.emit( lshr_ (dest_seg, imm(14, 8), dest_rpl)) ctx.emit( lshr_ (src_seg, imm(14, 8), src_rpl)) ctx.emit( sub_ (dest_seg, src_seg, tmp0)) ctx.emit( and_ (tmp0, imm(sign_bit(32), 32), tmp0)) ctx.emit( bisz_ (tmp0, tmp1)) ctx.emit( jcc_ ('check_passed')) ctx.emit( str_ (imm(1, 8), r('zf', 8))) ctx.emit( and_ (dest_seg, imm(0b0011111111111111, 16), result_seg)) ctx.emit( and_ (src_seg, imm(0b1100000000000000, 16), tmp2)) ctx.emit( or_ (dest_seg, tmp2, dest_seg)) operand.set(ctx, i, 0, result_seg) ctx.emit( jcc_ (imm(1, 8), 'done')) ctx.emit('check_passed') ctx.emit( str_ (imm(0, 8), r('zf', 8))) ctx.emit('done') ctx.emit( nop_())
def x86_pminu(ctx, i, size): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) a_parts = unpack(ctx, a, size) b_parts = unpack(ctx, b, size) tmp0 = ctx.tmp(size * 2) tmp1 = ctx.tmp(size * 2) dst_parts = [] for a_part, b_part in zip(a_parts, b_parts): dst_part = ctx.tmp(size) ctx.emit( sub_ (a_part, b_part, tmp0)) ctx.emit( and_ (tmp0, imm(sign_bit(size * 2), size * 2), tmp0)) ctx.emit( bisz_ (tmp0, tmp1)) ctx.emit( mul_ (b_part, tmp1, tmp0)) ctx.emit( xor_ (tmp1, imm(1, size * 2), tmp1)) ctx.emit( mul_ (a_part, tmp1, tmp1)) ctx.emit( add_ (tmp0, tmp1, tmp0)) ctx.emit( str_ (tmp0, dst_part)) dst_parts.append(dst_part) value = pack(ctx, dst_parts) operand.set(ctx, i, dst_id, value)
def x86_bextr(ctx, i): a = operand.get(ctx, i, 1) b = operand.get(ctx, i, 2) start = ctx.tmp(8) length = ctx.tmp(8) mask = ctx.tmp(a.size) tmp0 = ctx.tmp(8) result = ctx.tmp(a.size) ctx.emit( str_ (b, start)) ctx.emit( lshr_ (b, imm(8, 8), length)) # we are masking off [11111[start + length , start]111111] ctx.emit( sub_ (imm(a.size, a.size), length, tmp0)) ctx.emit( lshr_ (imm(mask(a.size), a.size), tmp0, mask)) # [[start + length, start]111111] ctx.emit( add_ (tmp0, start, tmp0)) ctx.emit( lshl_ (mask, tmp0, mask)) # [000000000000[start + length, start]] ctx.emit( lshr_ (mask, start, mask)) # we have our mask [00000[start + length , start]000000] ctx.emit( and_ (a, mask, result)) ctx.emit( lshr_ (result, start, result)) set_zf(ctx, result) ctx.emit( str_ (imm(0, 8), r('cf', 8))) ctx.emit( undef_(r('af', 8))) ctx.emit( undef_(r('sf', 8))) ctx.emit( undef_(r('pf', 8))) operand.set(ctx, i, 0, result)
def x86_shrd(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) if len(i.operands) == 2: c = ctx.counter else: c = operand.get(ctx, i, 2) size = a.size max_shift = size - 1 tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) result = ctx.tmp(size) # the shift amount is truncated at word_size - 1 ctx.emit( and_ (c, imm(max_shift, size), tmp0)) # make a register double the size of the operands containing b a ctx.emit( str_ (b, tmp1)) ctx.emit( lshl_ (tmp1, imm(size // 8, 8), tmp1)) ctx.emit( or_ (tmp1, a, tmp1)) # now shift right by the desired amount ctx.emit( lshr_ (tmp1, tmp0, tmp1)) # and truncate into result ctx.emit( str_ (tmp1, result)) # TODO: flags properly _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_movq(ctx, i): value = None dst = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) operand.set(ctx, i, 0, value, clear=True, sign_extend=False)
def x86_imul(ctx, i): if len(i.operands) == 1: # single operand form ctx.emit( unkn_()) elif len(i.operands) == 2: # double operand form a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) result = ctx.tmp(a.size * 2) ctx.emit( mul_ (a, b, result)) operand.set(ctx, i, 0, result) _imul_set_flags(ctx, result) else: # triple operand form a = operand.get(ctx, i, 1) b = operand.get(ctx, i, 2) if b.size < a.size: prev_b = b b = ctx.tmp(a.size) ctx.emit( sex_ (prev_b, b)) result = ctx.tmp(a.size * 2) ctx.emit( mul_ (a, b, result)) operand.set(ctx, i, 0, result) _imul_set_flags(ctx, result)
def x86_pshufd(ctx, i): src = operand.get(ctx, i, 1) order = operand.get(ctx, i, 2) value = imm(0, 128) for j in range(0, 4): prev_order = order order = ctx.tmp(8) prev_value = value value = ctx.tmp(128) tmp0 = ctx.tmp(128) tmp1 = ctx.tmp(8) tmp2 = ctx.tmp(32) tmp3 = ctx.tmp(128) tmp4 = ctx.tmp(32) tmp5 = ctx.tmp(128) tmp6 = ctx.tmp(128) ctx.emit( lshr_ (prev_order, imm(2, 8), order)) ctx.emit( and_ (prev_order, imm(0b00000011, 8), tmp1)) ctx.emit( mul_ (tmp1, imm(32, 32), tmp2)) ctx.emit( lshr_ (src, tmp2, tmp3)) ctx.emit( str_ (tmp3, tmp4)) ctx.emit( str_ (tmp4, tmp5)) ctx.emit( lshl_ (tmp5, imm(j * 32, 8), tmp6)) ctx.emit( add_ (tmp6, prev_value, value)) operand.set(ctx, i, 0, value)
def x86_cmpxchg(ctx, i): a = ctx.accumulator b = operand.get(ctx, i, 0) c = operand.get(ctx, i, 1) if b.size != a.size: prev_a = a a = ctx.tmp(b.size) ctx.emit( str_ (prev_a, a)) tmp0 = ctx.tmp(8) ctx.emit( equ_ (a, b, tmp0)) ctx.emit( jcc_ (tmp0, 'equal')) ctx.emit('not-equal') ctx.emit( str_ (c, ctx.accumulator)) ctx.emit( str_ (imm(0, 8), r('zf', 8))) ctx.emit( jcc_ (imm(1, 8), 'done')) ctx.emit('equal') operand.set(ctx, i, 0, c) ctx.emit( str_ (imm(1, 8), r('zf', 8))) ctx.emit('done') ctx.emit( nop_())
def x86_bswap(ctx, i): a = operand.get(ctx, i, 0) bytes = unpack(ctx, a, 8) bytes.reverse() value = pack(ctx, bytes) operand.set(ctx, i, 0, value)
def x86_pcmpgt(ctx, i, size): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) a_parts = unpack(ctx, a, size) b_parts = unpack(ctx, b, size) a_sign = ctx.tmp(size) a_abs = ctx.tmp(size) b_sign = ctx.tmp(size) b_abs = ctx.tmp(size) tmp0 = ctx.tmp(size * 2) a_abs_lt_b_abs = ctx.tmp(8) tmp1 = ctx.tmp(size) a_b_same_sign = ctx.tmp(8) a_neg = ctx.tmp(8) b_nonneg = ctx.tmp(8) a_neg_and_b_nonneg = ctx.tmp(8) cond = ctx.tmp(8) dst_parts = [] for a_part, b_part in zip(a_parts, b_parts): dst_part = ctx.tmp(size) ctx.emit( and_ (a_part, imm(sign_bit(size), size), a_sign)) ctx.emit( and_ (a_part, imm(~sign_bit(size), size), a_abs)) ctx.emit( and_ (b_part, imm(sign_bit(size), size), b_sign)) ctx.emit( and_ (b_part, imm(~sign_bit(size), size), b_abs)) # a < b <==> (|a| < |b| and sign(a) == sign(b)) or (a < 0 and b >= 0) # |a| < |b| ctx.emit( sub_ (a_abs, b_abs, tmp0)) ctx.emit( and_ (tmp0, imm(sign_bit(size * 2), size * 2), tmp0)) ctx.emit( bisz_ (tmp0, a_abs_lt_b_abs)) # sign(a) == sign(b) ctx.emit( xor_ (a_sign, b_sign, tmp1)) ctx.emit( bisz_ (tmp1, a_b_same_sign)) # a < 0 and b >= 0 ctx.emit( bisnz_(a_sign, a_neg)) ctx.emit( bisz_ (b_sign, b_nonneg)) ctx.emit( and_ (a_neg, b_nonneg, a_neg_and_b_nonneg)) ctx.emit( and_ (a_abs_lt_b_abs, a_b_same_sign, cond)) ctx.emit( or_ (cond, a_neg_and_b_nonneg, cond)) ctx.emit( mul_ (cond, imm(mask(size), size), dst_part)) dst_parts.append(dst_part) value = pack(ctx, dst_parts) operand.set(ctx, i, dst_id, value)
def x86_rol(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) max_shift = a.size-1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(8) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) tmp6 = ctx.tmp(size * 2) tmp7 = ctx.tmp(size) tmp8 = ctx.tmp(size) result = ctx.tmp(size) # the rotate amount is truncated at word_size - 1 ctx.emit( and_ (b, imm(max_shift, size), tmp0)) # zero rotate doesn't affect flags ctx.emit( bisz_ (tmp0, tmp1)) ctx.emit( jcc_ (tmp1, 'zero_rotate')) # zero extend ctx.emit( str_ (a, tmp2)) # left shift by the correct amount ctx.emit( lshl_ (tmp2, tmp0, tmp3)) # truncate to get first half of result ctx.emit( str_ (tmp3, tmp4)) # shift out then truncate to get second half of result ctx.emit( lshr_ (tmp3, imm(max_shift+1, size * 2), tmp5)) ctx.emit( str_ (tmp5, tmp6)) # or both halves of the result ctx.emit( or_ (tmp4, tmp6, result)) # compute carry flag (last bit that was shifted across) ctx.emit( and_ (result, imm(1, size), tmp7)) ctx.emit( bisnz_(tmp7, r('cf', 8))) if isinstance(b, reil.ImmediateOperand) and b.value == 1: # overflow flag is msb of input ^ msb output tmp9 = ctx.tmp(size) ctx.emit( and_ (a, imm(sign_bit(size), size), tmp8)) ctx.emit( xor_ (tmp8, tmp7, tmp8)) ctx.emit( bisnz_(tmp8, r('of', 8))) else: ctx.emit( undef_(r('of', 8))) operand.set(ctx, i, 0, result) ctx.emit( 'zero_rotate') ctx.emit( nop_())
def x86_rol(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) max_shift = a.size - 1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(8) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) tmp6 = ctx.tmp(size * 2) tmp7 = ctx.tmp(size) tmp8 = ctx.tmp(size) result = ctx.tmp(size) # the rotate amount is truncated at word_size - 1 ctx.emit(and_(b, imm(max_shift, size), tmp0)) # zero rotate doesn't affect flags ctx.emit(bisz_(tmp0, tmp1)) ctx.emit(jcc_(tmp1, 'zero_rotate')) # zero extend ctx.emit(str_(a, tmp2)) # left shift by the correct amount ctx.emit(lshl_(tmp2, tmp0, tmp3)) # truncate to get first half of result ctx.emit(str_(tmp3, tmp4)) # shift out then truncate to get second half of result ctx.emit(lshr_(tmp3, imm(max_shift + 1, size * 2), tmp5)) ctx.emit(str_(tmp5, tmp6)) # or both halves of the result ctx.emit(or_(tmp4, tmp6, result)) # compute carry flag (last bit that was shifted across) ctx.emit(and_(result, imm(1, size), tmp7)) ctx.emit(bisnz_(tmp7, r('cf', 8))) if isinstance(b, reil.ImmediateOperand) and b.value == 1: # overflow flag is msb of input ^ msb output tmp9 = ctx.tmp(size) ctx.emit(and_(a, imm(sign_bit(size), size), tmp8)) ctx.emit(xor_(tmp8, tmp7, tmp8)) ctx.emit(bisnz_(tmp8, r('of', 8))) else: ctx.emit(undef_(r('of', 8))) operand.set(ctx, i, 0, result) ctx.emit('zero_rotate') ctx.emit(nop_())
def x86_movlpd(ctx, i): a = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit( and_ (a, imm(0xffffffffffffffff0000000000000000, 128), tmp0)) ctx.emit( or_ (tmp0, value, tmp0)) operand.set(ctx, i, 0, tmp0)
def x86_movlpd(ctx, i): a = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit(and_(a, imm(0xffffffffffffffff0000000000000000, 128), tmp0)) ctx.emit(or_(tmp0, value, tmp0)) operand.set(ctx, i, 0, tmp0)
def x86_pcmpgt(ctx, i, size): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) a_parts = unpack(ctx, a, size) b_parts = unpack(ctx, b, size) a_sign = ctx.tmp(size) a_abs = ctx.tmp(size) b_sign = ctx.tmp(size) b_abs = ctx.tmp(size) tmp0 = ctx.tmp(size * 2) a_abs_lt_b_abs = ctx.tmp(8) tmp1 = ctx.tmp(size) a_b_same_sign = ctx.tmp(8) a_neg = ctx.tmp(8) b_nonneg = ctx.tmp(8) a_neg_and_b_nonneg = ctx.tmp(8) cond = ctx.tmp(8) dst_parts = [] for a_part, b_part in zip(a_parts, b_parts): dst_part = ctx.tmp(size) ctx.emit(and_(a_part, imm(sign_bit(size), size), a_sign)) ctx.emit(and_(a_part, imm(~sign_bit(size), size), a_abs)) ctx.emit(and_(b_part, imm(sign_bit(size), size), b_sign)) ctx.emit(and_(b_part, imm(~sign_bit(size), size), b_abs)) # a < b <==> (|a| < |b| and sign(a) == sign(b)) or (a < 0 and b >= 0) # |a| < |b| ctx.emit(sub_(a_abs, b_abs, tmp0)) ctx.emit(and_(tmp0, imm(sign_bit(size * 2), size * 2), tmp0)) ctx.emit(bisz_(tmp0, a_abs_lt_b_abs)) # sign(a) == sign(b) ctx.emit(xor_(a_sign, b_sign, tmp1)) ctx.emit(bisz_(tmp1, a_b_same_sign)) # a < 0 and b >= 0 ctx.emit(bisnz_(a_sign, a_neg)) ctx.emit(bisz_(b_sign, b_nonneg)) ctx.emit(and_(a_neg, b_nonneg, a_neg_and_b_nonneg)) ctx.emit(and_(a_abs_lt_b_abs, a_b_same_sign, cond)) ctx.emit(or_(cond, a_neg_and_b_nonneg, cond)) ctx.emit(mul_(cond, imm(mask(size), size), dst_part)) dst_parts.append(dst_part) value = pack(ctx, dst_parts) operand.set(ctx, i, dst_id, value)
def x86_not(ctx, i): a = operand.get(ctx, i, 0) size = a.size result = ctx.tmp(size) ctx.emit( xor_ (a, imm(mask(size), size), result)) operand.set(ctx, i, 0, result)
def x86_not(ctx, i): a = operand.get(ctx, i, 0) size = a.size result = ctx.tmp(size) ctx.emit(xor_(a, imm(mask(size), size), result)) operand.set(ctx, i, 0, result)
def x86_shl(ctx, i): a = operand.get(ctx, i, 0) if len(i.operands) == 1: if i.mnemonic.endswith('1'): b = imm(1, a.size) else: b = ctx.counter else: b = operand.get(ctx, i, 1) max_shift = a.size-1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(8) tmp5 = ctx.tmp(size) tmp6 = ctx.tmp(8) result = ctx.tmp(size) ctx.emit( and_ (b, imm(max_shift, size), tmp0)) # zero extend ctx.emit( str_ (a, tmp1)) # left shift by the correct amount ctx.emit( lshl_ (tmp1, tmp0, tmp2)) # truncate to get result ctx.emit( str_ (tmp2, result)) # compute carry flag ctx.emit( and_ (tmp2, imm(carry_bit(size), size * 2), tmp3)) ctx.emit( bisnz_(tmp3, r('cf', 8))) ctx.emit( equ_ (tmp0, imm(1, size), tmp4)) ctx.emit( bisz_ (tmp4, tmp4)) ctx.emit( jcc_ (tmp4, 'no_overflow_flag')) # compute overflow flag ctx.emit( and_ (result, imm(sign_bit(size), size), tmp5)) ctx.emit( bisz_ (tmp5, tmp6)) ctx.emit( equ_ (r('cf', 8), tmp6, r('of', 8))) ctx.emit( jcc_ (imm(1, 8), 'overflow_flag_done')) ctx.emit('no_overflow_flag') ctx.emit( str_ (imm(0, 8), r('of', 8))) ctx.emit('overflow_flag_done') _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_shl(ctx, i): a = operand.get(ctx, i, 0) if len(i.operands) == 1: if i.mnemonic.endswith('1'): b = imm(1, a.size) else: b = ctx.counter else: b = operand.get(ctx, i, 1) max_shift = a.size - 1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(8) tmp5 = ctx.tmp(size) tmp6 = ctx.tmp(8) result = ctx.tmp(size) ctx.emit(and_(b, imm(max_shift, size), tmp0)) # zero extend ctx.emit(str_(a, tmp1)) # left shift by the correct amount ctx.emit(lshl_(tmp1, tmp0, tmp2)) # truncate to get result ctx.emit(str_(tmp2, result)) # compute carry flag ctx.emit(and_(tmp2, imm(carry_bit(size), size * 2), tmp3)) ctx.emit(bisnz_(tmp3, r('cf', 8))) ctx.emit(equ_(tmp0, imm(1, size), tmp4)) ctx.emit(bisz_(tmp4, tmp4)) ctx.emit(jcc_(tmp4, 'no_overflow_flag')) # compute overflow flag ctx.emit(and_(result, imm(sign_bit(size), size), tmp5)) ctx.emit(bisz_(tmp5, tmp6)) ctx.emit(equ_(r('cf', 8), tmp6, r('of', 8))) ctx.emit(jcc_(imm(1, 8), 'overflow_flag_done')) ctx.emit('no_overflow_flag') ctx.emit(str_(imm(0, 8), r('of', 8))) ctx.emit('overflow_flag_done') _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_pxor(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) size = min(a.size, b.size) value = ctx.tmp(size) ctx.emit( xor_ (a, b, value)) operand.set(ctx, i, 0, value)
def x86_pxor(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) size = min(a.size, b.size) value = ctx.tmp(size) ctx.emit(xor_(a, b, value)) operand.set(ctx, i, 0, value)
def x86_neg(ctx, i): a = operand.get(ctx, i, 0) result = ctx.tmp(a.size * 2) ctx.emit( sub_ (imm(0, a.size), a, result)) _sub_set_flags(ctx, imm(0, a.size), a, result) operand.set(ctx, i, 0, result)
def x86_xchg(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) ctx.emit( str_ (a, tmp0)) operand.set(ctx, i, 0, b) operand.set(ctx, i, 1, tmp0)
def x86_movzx(ctx, i): value = None if len(i.operands) == 1: # source is the accumulator value = ctx.accumulator else: value = operand.get(ctx, i, 1) operand.set(ctx, i, 0, value, clear=True, sign_extend=False)
def x86_pop(ctx, i): a = operand.get(ctx, i, 0) value = ctx.tmp(a.size) ctx.emit( ldm_ (ctx.stack_ptr, value)) ctx.emit( add_ (ctx.stack_ptr, imm(value.size // 8, ctx.word_size), ctx.stack_ptr)) operand.set(ctx, i, 0, value)
def x86_inc(ctx, i): a = operand.get(ctx, i, 0) b = imm(1, a.size) result = ctx.tmp(a.size * 2) ctx.emit( add_ (a, b, result)) _add_set_flags(ctx, a, b, result, cf=False) operand.set(ctx, i, 0, result)
def x86_psrldq(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) result = ctx.tmp(a.size) shift = min(b.value, 16) # right shift by the correct amount ctx.emit(lshr_(a, imm(shift * 8, 8), result)) operand.set(ctx, i, 0, result)
def x86_pslldq(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) result = ctx.tmp(a.size) shift = min(b.value, 16) # left shift by the correct amount ctx.emit( lshl_ (a, imm(shift * 8, 8), result)) operand.set(ctx, i, 0, result)
def x86_xor(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit(xor_(a, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result, clear=True)
def x86_xor(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit( xor_ (a, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result, clear=True)
def x86_movhpd(ctx, i): a = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit(and_(a, imm(0x0000000000000000ffffffffffffffff, 128), tmp0)) ctx.emit(str_(value, tmp1)) ctx.emit(lshl_(tmp1, imm(64, 8), tmp1)) ctx.emit(or_(tmp0, tmp1, tmp0)) operand.set(ctx, i, 0, tmp0)
def x86_and(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit( and_ (a, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_and(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit(and_(a, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_movhpd(ctx, i): a = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit( and_ (a, imm(0x0000000000000000ffffffffffffffff, 128), tmp0)) ctx.emit( str_ (value, tmp1)) ctx.emit( lshl_ (tmp1, imm(64, 8), tmp1)) ctx.emit( or_ (tmp0, tmp1, tmp0)) operand.set(ctx, i, 0, tmp0)
def _write_bit(ctx, i, base_index, offset_index, bit): if operand.is_memory(ctx, i, base_index): # nasty case, indexing into in-memory bitstring; offset can be # > word_size base = operand.get_address(ctx, i, base_index) offset = operand.get(ctx, i, offset_index) offset_sign = ctx.tmp(8) byte_offset = ctx.tmp(base.size) tmp0 = ctx.tmp(offset.size) byte = ctx.tmp(8) bitmask = ctx.tmp(8) ctx.emit(and_(offset, imm(sign_bit(offset.size), offset.size), tmp0)) ctx.emit(bisnz_(tmp0, offset_sign)) ctx.emit(and_(offset, imm(~sign_bit(offset.size), offset.size), offset)) ctx.emit(div_(offset, imm(8, offset.size), byte_offset)) ctx.emit(mod_(offset, imm(8, offset.size), offset)) ctx.emit(jcc_(offset_sign, 'negative_offset')) ctx.emit(add_(base, byte_offset, base)) ctx.emit(jcc_(imm(1, 8), 'base_calculated')) ctx.emit('negative_offset') ctx.emit(sub_(base, byte_offset, base)) ctx.emit('base_calculated') ctx.emit(ldm_(base, byte)) ctx.emit(lshl_(imm(1, 8), offset, bitmask)) ctx.emit(xor_(bitmask, imm(mask(8), 8), bitmask)) ctx.emit(and_(byte, bitmask, byte)) ctx.emit(lshl_(bit, offset, bitmask)) ctx.emit(or_(byte, bit, byte)) ctx.emit(stm_(byte, base)) else: # simple case, it's a register a = operand.get(ctx, i, base_index) offset = operand.get(ctx, i, offset_index) bitmask = ctx.tmp(a.size) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit(lshl_(imm(1, a.size), offset, bitmask)) ctx.emit(xor_(bitmask, imm(mask(a.size), a.size), bitmask)) ctx.emit(and_(a, bitmask, tmp0)) ctx.emit(str_(bit, tmp1)) ctx.emit(lshl_(tmp1, offset, tmp1)) ctx.emit(or_(tmp0, tmp1, tmp1)) operand.set(ctx, i, base_index, tmp1)
def x86_sub(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) ctx.emit( sub_ (a, b, result)) _sub_set_flags(ctx, a, b, result) operand.set(ctx, i, 0, result)
def x86_andn(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit( xor_ (a, imm(mask(size), size), result)) ctx.emit( and_ (result, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_andn(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit(xor_(a, imm(mask(size), size), result)) ctx.emit(and_(result, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def _write_bit(ctx, i, base_index, offset_index, bit): if operand.is_memory(ctx, i, base_index): # nasty case, indexing into in-memory bitstring; offset can be # > word_size base = operand.get_address(ctx, i, base_index) offset = operand.get(ctx, i, offset_index) offset_sign = ctx.tmp(8) byte_offset = ctx.tmp(base.size) tmp0 = ctx.tmp(offset.size) byte = ctx.tmp(8) bitmask = ctx.tmp(8) ctx.emit( and_ (offset, imm(sign_bit(offset.size), offset.size), tmp0)) ctx.emit( bisnz_(tmp0, offset_sign)) ctx.emit( and_ (offset, imm(~sign_bit(offset.size), offset.size), offset)) ctx.emit( div_ (offset, imm(8, offset.size), byte_offset)) ctx.emit( mod_ (offset, imm(8, offset.size), offset)) ctx.emit( jcc_ (offset_sign, 'negative_offset')) ctx.emit( add_ (base, byte_offset, base)) ctx.emit( jcc_ (imm(1, 8), 'base_calculated')) ctx.emit('negative_offset') ctx.emit( sub_ (base, byte_offset, base)) ctx.emit('base_calculated') ctx.emit( ldm_ (base, byte)) ctx.emit( lshl_ (imm(1, 8), offset, bitmask)) ctx.emit( xor_ (bitmask, imm(mask(8), 8), bitmask)) ctx.emit( and_ (byte, bitmask, byte)) ctx.emit( lshl_ (bit, offset, bitmask)) ctx.emit( or_ (byte, bit, byte)) ctx.emit( stm_ (byte, base)) else: # simple case, it's a register a = operand.get(ctx, i, base_index) offset = operand.get(ctx, i, offset_index) bitmask = ctx.tmp(a.size) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit( lshl_ (imm(1, a.size), offset, bitmask)) ctx.emit( xor_ (bitmask, imm(mask(a.size), a.size), bitmask)) ctx.emit( and_ (a, bitmask, tmp0)) ctx.emit( str_ (bit, tmp1)) ctx.emit( lshl_ (tmp1, offset, tmp1)) ctx.emit( or_ (tmp0, tmp1, tmp1)) operand.set(ctx, i, base_index, tmp1)
def x86_adc(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) ctx.emit( add_ (a, b, result)) ctx.emit( add_ (result, r('cf', 8), result)) _add_set_flags(ctx, a, b, result) operand.set(ctx, i, 0, result)
def x86_sbb(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) ctx.emit( sub_ (a, b, result)) ctx.emit( sub_ (result, r('cf', 8), result)) _sub_set_flags(ctx, a, b, result) operand.set(ctx, i, 0, result, clear=True)
def x86_pxor(ctx, i): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) value = ctx.tmp(a.size) ctx.emit(xor_(a, b, value)) # TODO: this will clear all the remaining bits of the destination register, # which is incorrect for the legacy sse version. When ymmX register support # is added, this will be broken. operand.set(ctx, i, dst_id, value)
def x86_sar(ctx, i): a = operand.get(ctx, i, 0) if len(i.operands) == 1: if i.mnemonic.endswith('1'): b = imm(1, a.size) else: b = ctx.counter else: b = operand.get(ctx, i, 1) max_shift = a.size - 1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) result = ctx.tmp(a.size) # the shift amount is truncated at word_size - 1 ctx.emit(and_(b, imm(max_shift, size), tmp0)) # zero extend ctx.emit(str_(a, tmp1)) # left shift all the way ctx.emit(lshl_(tmp1, imm(max_shift + 1, size * 2), tmp2)) # right shift by the correct amount ctx.emit(ashr_(tmp2, tmp0, tmp3)) # save off the first bit that is going to be lost ctx.emit(and_(tmp3, imm(sign_bit(size), size * 2), tmp4)) # shift out then truncate to get second half of result ctx.emit(ashr_(tmp3, imm(max_shift + 1, size * 2), tmp5)) ctx.emit(str_(tmp5, result)) # set sign flag ctx.emit(bisnz_(tmp4, r('cf', 8))) # overflow flag is always 0 ctx.emit(str_(imm(0, 8), r('of', 8))) _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_blsmsk(ctx, i): a = operand.get(ctx, i, 1) bit = imm(sign_bit(a.size), a.size) index = imm(a.size, a.size) bit = ctx.tmp(a.size) index = ctx.tmp(a.size) result = ctx.tmp(a.size) tmp0 = ctx.tmp(a.size) ctx.emit(jcc_(a, 'non-zero')) # if a is zero ctx.emit(str_(imm(0, 8), r('cf', 8))) ctx.emit(jcc_(imm(1, 8), 'done')) # set up loop variables and clear zf ctx.emit('non-zero') ctx.emit(str_(imm(0, 8), r('zf', 8))) ctx.emit(str_(imm(0, a.size), index)) ctx.emit(str_(imm(1, a.size), bit)) # LOOP ctx.emit('loop') ctx.emit(and_(a, bit, tmp0)) ctx.emit(jcc_(tmp0, 'found')) # update these for the next one ctx.emit(add_(index, imm(1, a.size), index)) ctx.emit(lshl_(bit, imm(1, a.size), bit)) ctx.emit(jcc_(imm(1, 8), 'loop')) # non-zero case epilogue ctx.emit('found') ctx.emit(str_(imm(mask(a.size), a.size), result)) ctx.emit(lshl_(result, index, result)) ctx.emit(lshr_(result, index, result)) ctx.emit(xor_(imm(mask(a.size), a.size), result, result)) operand.set(ctx, i, 0, result, clear=True) set_sf(ctx, result) ctx.emit(str_(imm(1, 8), r('cf', 8))) ctx.emit('done') ctx.emit(str_(imm(0, 8), r('zf', 8))) ctx.emit(str_(imm(0, 8), r('of', 8))) ctx.emit(undef_(r('pf', 8))) ctx.emit(undef_(r('af', 8)))
def x86_sar(ctx, i): a = operand.get(ctx, i, 0) if len(i.operands) == 1: if i.mnemonic.endswith('1'): b = imm(1, a.size) else: b = ctx.counter else: b = operand.get(ctx, i, 1) max_shift = a.size-1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) result = ctx.tmp(a.size) # the shift amount is truncated at word_size - 1 ctx.emit( and_ (b, imm(max_shift, size), tmp0)) # zero extend ctx.emit( str_ (a, tmp1)) # left shift all the way ctx.emit( lshl_ (tmp1, imm(max_shift+1, size * 2), tmp2)) # right shift by the correct amount ctx.emit( ashr_ (tmp2, tmp0, tmp3)) # save off the first bit that is going to be lost ctx.emit( and_ (tmp3, imm(sign_bit(size), size * 2), tmp4)) # shift out then truncate to get second half of result ctx.emit( ashr_ (tmp3, imm(max_shift+1, size * 2), tmp5)) ctx.emit( str_ (tmp5, result)) # set sign flag ctx.emit( bisnz_(tmp4, r('cf', 8))) # overflow flag is always 0 ctx.emit( str_ (imm(0, 8), r('of', 8))) _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_bswap(ctx, i): a = operand.get(ctx, i, 0) if a.size != 32: raise pyreil.error.IllegalInstruction( 'bswap on non 32-bit value!') tmp0 = ctx.tmp(32) tmp1 = ctx.tmp(32) tmp2 = ctx.tmp(32) tmp3 = ctx.tmp(32) tmp4 = ctx.tmp(32) tmp5 = ctx.tmp(32) tmp6 = ctx.tmp(32) tmp7 = ctx.tmp(32) tmp8 = ctx.tmp(32) result = ctx.tmp(32) bytes = [ ctx.tmp(8), ctx.tmp(8), ctx.tmp(8), ctx.tmp(8), ] lshift8 = imm(8, 8) rshift8 = imm(-8, 8) # decompose into bytes ctx.emit( str_ (a, bytes[0])) ctx.emit( bsh_ (a, lshift8, tmp0)) ctx.emit( str_ (tmp0, bytes[1])) ctx.emit( bsh_ (tmp0, lshift8, tmp1)) ctx.emit( str_ (tmp1, bytes[2])) ctx.emit( bsh_ (tmp1, lshift8, tmp2)) ctx.emit( str_ (tmp2, bytes[3])) # put back together ctx.emit( str_ (bytes[0], tmp3)) ctx.emit( bsh_ (tmp3, rshift8, tmp4)) ctx.emit( add_ (bytes[1], tmp4, tmp5)) ctx.emit( bsh_ (tmp5, rshift8, tmp6)) ctx.emit( add_ (bytes[1], tmp6, tmp7)) ctx.emit( bsh_ (tmp7, rshift8, tmp8)) ctx.emit( add_ (bytes[1], tmp8, result)) operand.set(ctx, i, 0, result)
def x86_ror(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) max_shift = ctx.word_size - 1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) tmp6 = ctx.tmp(size) result = ctx.tmp(size) # the rotate amount is truncated at word_size - 1 ctx.emit(and_(b, imm(max_shift, size), tmp0)) # zero extend ctx.emit(str_(a, tmp1)) # left shift all the way ctx.emit(lshl_(tmp1, imm(max_shift + 1, size * 2), tmp2)) # right shift by the correct amount ctx.emit(lshr_(tmp2, tmp0, tmp3)) # truncate to get first half of result ctx.emit(str_(tmp3, tmp4)) # shift out then truncate to get second half of result ctx.emit(lshr_(tmp3, imm(max_shift + 1, size * 2), tmp5)) ctx.emit(str_(tmp5, tmp6)) # or both halves of the result ctx.emit(or_(tmp4, tmp6, result)) # TODO: compute carry flag if isinstance(b, pyreil.ImmediateOperand) and b.value == 1: # TODO: compute overflow flag pass else: ctx.emit(undef_(r('of', 8))) operand.set(ctx, i, 0, result)
def x86_cmpxchg16b(ctx, i): rdx = operand.get_register(ctx, i, 'rdx') rax = operand.get_register(ctx, i, 'rax') rdx_rax = ctx.tmp(128) rcx = operand.get_register(ctx, i, 'rcx') rbx = operand.get_register(ctx, i, 'rbx') rcx_rbx = ctx.tmp(128) value = operand.get(ctx, i, 0) tmp0 = ctx.tmp(128) tmp1 = ctx.tmp(8) result_rax = ctx.tmp(64) result_rdx = ctx.tmp(64) ctx.emit(lshl_(rdx, imm(64, 8), rdx_rax)) ctx.emit(str_(rax, tmp0)) ctx.emit(or_(rdx_rax, tmp0, rdx_rax)) ctx.emit(equ_(value, rdx_rax, tmp1)) ctx.emit(jcc_(tmp1, 'equal')) ctx.emit('not-equal') ctx.emit(str_(value, result_rax)) ctx.emit(lshr_(value, imm(64, 8), value)) ctx.emit(str_(value, result_rdx)) operand.set_register(ctx, i, 'rdx', result_rdx) operand.set_register(ctx, i, 'rax', result_rax) ctx.emit(str_(imm(0, 8), r('zf', 8))) ctx.emit(jcc_(imm(1, 8), 'done')) ctx.emit('equal') ctx.emit(lshl_(rcx, imm(64, 8), rcx_rbx)) ctx.emit(str_(rbx, tmp0)) ctx.emit(or_(rcx_rbx, tmp0, rcx_rbx)) operand.set(ctx, i, 0, rcx_rbx) ctx.emit(str_(imm(1, 8), r('zf', 8))) ctx.emit('done') ctx.emit(nop_())
def x86_adox(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) tmp0 = ctx.tmp(a.size * 2) ctx.emit(add_(a, b, result)) ctx.emit(add_(result, r('of', 8), result)) # only set carry flag ctx.emit(and_(result, imm(carry_bit(a.size), result.size), tmp0)) ctx.emit(bisnz_(tmp0, r('of', 8))) operand.set(ctx, i, 0, result)
def x86_imul(ctx, i): if len(i.operands) == 1: # single operand form a = ctx.accumulator b = ctx.data result = ctx.tmp(a.size * 2) ctx.emit( mul_ (a, b, result)) ctx.emit( str_ (result, ctx.accumulator)) ctx.emit( lshr_ (result, imm(a.size, 8), result)) ctx.emit( str_ (result, ctx.data)) _imul_set_flags(ctx, result) elif len(i.operands) == 2: # double operand form a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) result = ctx.tmp(a.size * 2) ctx.emit( mul_ (a, b, result)) operand.set(ctx, i, 0, result) _imul_set_flags(ctx, result) else: # triple operand form a = operand.get(ctx, i, 1) b = operand.get(ctx, i, 2) if b.size < a.size: prev_b = b b = ctx.tmp(a.size) ctx.emit( sex_ (prev_b, b)) result = ctx.tmp(a.size * 2) ctx.emit( mul_ (a, b, result)) operand.set(ctx, i, 0, result) _imul_set_flags(ctx, result)