def emit_vec_pack_i(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, vecloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc = arglocs residx = residxloc.value srcidx = srcidxloc.value count = countloc.value size = sizeloc.value assert isinstance(op, VectorOp) newsize = op.bytesize if count == 1: if resloc.is_core_reg(): assert sourceloc.is_vector_reg() index = l.addr(srcidx) self.mc.VLGV(resloc, sourceloc, index, l.itemsize_to_mask(size)) else: assert sourceloc.is_core_reg() assert resloc.is_vector_reg() index = l.addr(residx) self.mc.VLR(resloc, vecloc) self.mc.VLVG(resloc, sourceloc, index, l.itemsize_to_mask(newsize)) else: assert resloc.is_vector_reg() assert sourceloc.is_vector_reg() self.mc.VLR(resloc, vecloc) for j in range(count): sindex = l.addr(j + srcidx) # load from sourceloc into GP reg and store back into resloc self.mc.VLGV(r.SCRATCH, sourceloc, sindex, l.itemsize_to_mask(size)) rindex = l.addr(j + residx) self.mc.VLVG(resloc, r.SCRATCH, rindex, l.itemsize_to_mask(newsize))
def emit_vec_pack_i(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, vecloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc = arglocs residx = residxloc.value srcidx = srcidxloc.value count = countloc.value size = sizeloc.value assert isinstance(op, VectorOp) newsize = op.bytesize if count == 1: if resloc.is_core_reg(): assert sourceloc.is_vector_reg() index = l.addr(srcidx) self.mc.VLGV(resloc, sourceloc, index, l.itemsize_to_mask(size)) else: assert sourceloc.is_core_reg() assert resloc.is_vector_reg() index = l.addr(residx) self.mc.VLR(resloc, vecloc) self.mc.VLVG(resloc, sourceloc, index, l.itemsize_to_mask(newsize)) else: assert resloc.is_vector_reg() assert sourceloc.is_vector_reg() self.mc.VLR(resloc, vecloc) for j in range(count): sindex = l.addr(j + srcidx) # load from sourceloc into GP reg and store back into resloc self.mc.VLGV(r.SCRATCH, sourceloc, sindex, l.itemsize_to_mask(size)) rindex = l.addr(j + residx) self.mc.VLVG(resloc, r.SCRATCH, rindex, l.itemsize_to_mask(newsize))
def emit_raw_call(self): # always allocate a stack frame for the new function # save the SP back chain # move the frame pointer if self.subtracted_to_sp != 0: # rewrite the back chain self.mc.LG(r.SCRATCH, l.addr(0, r.SP)) self.mc.STG(r.SCRATCH, l.addr(-self.subtracted_to_sp, r.SP)) self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) self.mc.raw_call()
def emit_raw_call(self): # always allocate a stack frame for the new function # save the SP back chain # move the frame pointer if self.subtracted_to_sp != 0: # rewrite the back chain self.mc.LG(r.SCRATCH, l.addr(0, r.SP)) self.mc.STG(r.SCRATCH, l.addr(-self.subtracted_to_sp, r.SP)) self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) self.mc.raw_call()
def test_load_byte_and_imm(self): adr = self.a.datablockwrapper.malloc_aligned(16, 16) data = rffi.cast(rffi.CArrayPtr(rffi.ULONG), adr) data[0] = rffi.cast(rffi.ULONG, intmask(0xffffFFFFffff0001)) self.a.mc.load_imm(r.r3, adr) self.a.mc.LG(r.r2, loc.addr(0,r.r3)) self.a.mc.LLGC(r.r2, loc.addr(7,r.r3)) self.a.mc.NILL(r.r2, loc.imm(0x0)) self.a.mc.BCR(con.ANY, r.r14) assert run_asm(self.a) == 0
def test_float_load_zero(self): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.addr(mem) self.mc.LZDR(r.f0) self.mc.LG(r.r11, loc.addr(0, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) run_asm(self.a) assert isclose(mem[0], 0.0)
def test_load_byte_and_imm(self): adr = self.a.datablockwrapper.malloc_aligned(16, 16) data = rffi.cast(rffi.CArrayPtr(rffi.ULONG), adr) data[0] = rffi.cast(rffi.ULONG, intmask(0xffffFFFFffff0001)) self.a.mc.load_imm(r.r3, adr) self.a.mc.LG(r.r2, loc.addr(0, r.r3)) self.a.mc.LLGC(r.r2, loc.addr(7, r.r3)) self.a.mc.NILL(r.r2, loc.imm(0x0)) self.a.mc.BCR(con.ANY, r.r14) assert run_asm(self.a) == 0
def test_ag_overflow(self): self.a.mc.BRC(con.ANY, loc.imm(4 + 8 + 8)) self.a.mc.write('\x7f' + '\xff' * 7) self.a.mc.write('\x7f' + '\xff' * 7) self.a.mc.LARL(r.r5, loc.imm(-8)) self.a.mc.LG(r.r4, loc.addr(8, r.r5)) self.a.mc.AG(r.r4, loc.addr(0, r.r5)) self.a.mc.LGR(r.r2, r.r4) self.a.jmpto(r.r14) assert run_asm(self.a) == -2
def test_float_load_zero(self): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.addr(mem) self.mc.LZDR(r.f0) self.mc.LG(r.r11, loc.addr(0, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) run_asm(self.a) assert isclose(mem[0], 0.0)
def test_ag_overflow(self): self.a.mc.BRC(con.ANY, loc.imm(4+8+8)) self.a.mc.write('\x7f' + '\xff' * 7) self.a.mc.write('\x7f' + '\xff' * 7) self.a.mc.LARL(r.r5, loc.imm(-8)) self.a.mc.LG(r.r4, loc.addr(8,r.r5)) self.a.mc.AG(r.r4, loc.addr(0,r.r5)) self.a.mc.LGR(r.r2, r.r4) self.a.jmpto(r.r14) assert run_asm(self.a) == -2
def _call_assembler_load_result(self, op, result_loc): if op.type != VOID: # load the return value from the dead frame's value index 0 kind = op.type descr = self.cpu.getarraydescr_for_frame(kind) ofs = self.cpu.unpack_arraydescr(descr) if kind == FLOAT: assert result_loc is r.f0 self.mc.LD(r.f0, l.addr(ofs, r.r2)) else: assert result_loc is r.r2 self.mc.LG(r.r2, l.addr(ofs, r.r2))
def _call_assembler_load_result(self, op, result_loc): if op.type != VOID: # load the return value from the dead frame's value index 0 kind = op.type descr = self.cpu.getarraydescr_for_frame(kind) ofs = self.cpu.unpack_arraydescr(descr) if kind == FLOAT: assert result_loc is r.f0 self.mc.LD(r.f0, l.addr(ofs, r.r2)) else: assert result_loc is r.r2 self.mc.LG(r.r2, l.addr(ofs, r.r2))
def test_float_cmp(self): with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.float(1.0) pool.float(2.0) self.mc.LD(r.f0, loc.addr(0, r.r13)) self.mc.LD(r.f1, loc.addr(8, r.r13)) self.mc.CDBR(r.f0, r.f1) self.mc.LGHI(r.r2, loc.imm(0)) self.mc.BCR(con.EQ, r.r14) # must not branch self.mc.LGHI(r.r2, loc.imm(1)) self.a.jmpto(r.r14) assert run_asm(self.a) == 1
def test_cast_single_float_to_float(self): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.single_float(6.66) pool.addr(mem) self.mc.LEY(r.f1, loc.addr(0, r.r13)) ## cast short to long! self.mc.LDEBR(r.f0, r.f1) self.mc.LG(r.r11, loc.addr(4, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) run_asm(self.a) assert isclose(mem[0], 6.66, abs_tol=0.05)
def test_cast_int64_to_float(self): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.int64(12345) pool.addr(mem) self.mc.LG(r.r12, loc.addr(0, r.r13)) # cast int to float! self.mc.CDGBR(r.f0, r.r12) self.mc.LG(r.r11, loc.addr(8, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) run_asm(self.a) assert isclose(mem[0], 12345.0)
def test_cast_single_float_to_float(self): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.single_float(6.66) pool.addr(mem) self.mc.LEY(r.f1, loc.addr(0, r.r13)) ## cast short to long! self.mc.LDEBR(r.f0, r.f1) self.mc.LG(r.r11, loc.addr(4, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) run_asm(self.a) assert isclose(mem[0], 6.66, abs_tol=0.05)
def prepare_int_shift(self, op): a0 = op.getarg(0) a1 = op.getarg(1) if isinstance(a1, ConstInt): # note that the shift value is stored # in the addr part of the instruction l1 = addr(a1.getint()) else: tmp = self.rm.ensure_reg(a1) l1 = addr(0, tmp) l0 = self.ensure_reg(a0) lr = self.force_allocate_reg(op) return [lr, l0, l1]
def test_cast_int64_to_float(self): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.int64(12345) pool.addr(mem) self.mc.LG(r.r12, loc.addr(0, r.r13)) # cast int to float! self.mc.CDGBR(r.f0, r.r12) self.mc.LG(r.r11, loc.addr(8, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) run_asm(self.a) assert isclose(mem[0], 12345.0)
def test_float_cmp(self): with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.float(1.0) pool.float(2.0) self.mc.LD(r.f0, loc.addr(0, r.r13)) self.mc.LD(r.f1, loc.addr(8, r.r13)) self.mc.CDBR(r.f0, r.f1) self.mc.LGHI(r.r2, loc.imm(0)) self.mc.BCR(con.EQ, r.r14) # must not branch self.mc.LGHI(r.r2, loc.imm(1)) self.a.jmpto(r.r14) assert run_asm(self.a) == 1
def read_real_errno(self, save_err): if save_err & rffi.RFFI_SAVE_ERRNO: # Just after a call, read the real 'errno' and save a copy of # it inside our thread-local '*_errno'. Registers r3-r6 # never contain anything after the call. if save_err & rffi.RFFI_ALT_ERRNO: rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) else: rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) p_errno = llerrno.get_p_errno_offset(self.asm.cpu) self.mc.LG(r.r3, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP)) self.mc.LG(r.r4, l.addr(p_errno, r.r3)) self.mc.LGF(r.r4, l.addr(0, r.r4)) self.mc.STY(r.r4, l.addr(rpy_errno, r.r3))
def test_float_mul_to_memory(self, v1, v2, res): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.float(v1) pool.float(v2) pool.addr(mem) self.mc.LD(r.f0, loc.addr(0, r.r13)) self.mc.MDB(r.f0, loc.addr(8, r.r13)) self.mc.LG(r.r11, loc.addr(16, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) self.a.jmpto(r.r14) run_asm(self.a) assert isclose(mem[0],res)
def _load_address(self, base_loc, index_loc, offset_imm): assert offset_imm.is_imm() offset = offset_imm.value if index_loc.is_imm(): offset = index_loc.value + offset if self._mem_offset_supported(offset): addr_loc = l.addr(offset, base_loc) else: self.mc.load_imm(r.SCRATCH, offset) addr_loc = l.addr(0, base_loc, r.SCRATCH) else: assert self._mem_offset_supported(offset) addr_loc = l.addr(offset, base_loc, index_loc) return addr_loc
def read_real_errno(self, save_err): if save_err & rffi.RFFI_SAVE_ERRNO: # Just after a call, read the real 'errno' and save a copy of # it inside our thread-local '*_errno'. Registers r3-r6 # never contain anything after the call. if save_err & rffi.RFFI_ALT_ERRNO: rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) else: rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) p_errno = llerrno.get_p_errno_offset(self.asm.cpu) self.mc.LG(r.r3, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP)) self.mc.LG(r.r4, l.addr(p_errno, r.r3)) self.mc.LGF(r.r4, l.addr(0, r.r4)) self.mc.STY(r.r4, l.addr(rpy_errno, r.r3))
def _load_address(self, base_loc, index_loc, offset_imm): assert offset_imm.is_imm() offset = offset_imm.value if index_loc.is_imm(): offset = index_loc.value + offset if self._mem_offset_supported(offset): addr_loc = l.addr(offset, base_loc) else: self.mc.load_imm(r.SCRATCH, offset) addr_loc = l.addr(0, base_loc, r.SCRATCH) else: assert self._mem_offset_supported(offset) addr_loc = l.addr(offset, base_loc, index_loc) return addr_loc
def test_float_mul_to_memory(self, v1, v2, res): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with LiteralPoolCtx(self) as pool: pool.float(v1) pool.float(v2) pool.addr(mem) self.mc.LD(r.f0, loc.addr(0, r.r13)) self.mc.MDB(r.f0, loc.addr(8, r.r13)) self.mc.LG(r.r11, loc.addr(16, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) self.a.jmpto(r.r14) run_asm(self.a) assert isclose(mem[0], res)
def test_simple_func(self): # enter self.a.mc.STMG(r.r11, r.r15, loc.addr(-96, r.SP)) self.a.mc.AHI(r.SP, loc.imm(-96)) # from the start of BRASL to end of jmpto there are 8+6 bytes self.a.mc.BRASL(r.r14, loc.imm(8 + 6)) self.a.mc.LMG(r.r11, r.r15, loc.addr(0, r.SP)) self.a.jmpto(r.r14) addr = self.a.mc.get_relative_pos() assert addr & 0x1 == 0 gen_func_prolog(self.a.mc) self.a.mc.LGHI(r.r2, loc.imm(321)) gen_func_epilog(self.a.mc) assert run_asm(self.a) == 321
def test_simple_func(self): # enter self.a.mc.STMG(r.r11, r.r15, loc.addr(-96, r.SP)) self.a.mc.AHI(r.SP, loc.imm(-96)) # from the start of BRASL to end of jmpto there are 8+6 bytes self.a.mc.BRASL(r.r14, loc.imm(8+6)) self.a.mc.LMG(r.r11, r.r15, loc.addr(0, r.SP)) self.a.jmpto(r.r14) addr = self.a.mc.get_relative_pos() assert addr & 0x1 == 0 gen_func_prolog(self.a.mc) self.a.mc.LGHI(r.r2, loc.imm(321)) gen_func_epilog(self.a.mc) assert run_asm(self.a) == 321
def emit_vec_int_signext(self, op, arglocs, regalloc): resloc, loc0, osizeloc, nsizeloc = arglocs # signext is only allowed if the data type sizes do not change. # e.g. [byte,byte] = sign_ext([byte, byte]), a simple move is sufficient! osize = osizeloc.value nsize = nsizeloc.value if osize == nsize: self.regalloc_mov(loc0, resloc) elif (osize == 4 and nsize == 8) or (osize == 8 and nsize == 4): self.mc.VLGV(r.SCRATCH, loc0, l.addr(0), l.itemsize_to_mask(osize)) self.mc.VLVG(resloc, r.SCRATCH, l.addr(0), l.itemsize_to_mask(nsize)) self.mc.VLGV(r.SCRATCH, loc0, l.addr(1), l.itemsize_to_mask(osize)) self.mc.VLVG(resloc, r.SCRATCH, l.addr(1), l.itemsize_to_mask(nsize)) if nsize == 8: self.mc.VSEG(resloc, resloc, l.itemsize_to_mask(osize))
def test_literal_pool(self): gen_func_prolog(self.a.mc) self.a.mc.BRAS(r.r13, loc.imm(8 + self.mc.BRAS_byte_count)) self.a.mc.write('\x08\x07\x06\x05\x04\x03\x02\x01') self.a.mc.LG(r.r2, loc.addr(0, r.r13)) gen_func_epilog(self.a.mc) assert run_asm(self.a) == 0x0807060504030201
def emit_zero_array(self, op, arglocs, regalloc): base_loc, startindex_loc, length_loc, \ ofs_loc, itemsize_loc = arglocs if ofs_loc.is_imm(): assert check_imm_value(ofs_loc.value) self.mc.AGHI(base_loc, ofs_loc) else: self.mc.AGR(base_loc, ofs_loc) if startindex_loc.is_imm(): assert check_imm_value(startindex_loc.value) self.mc.AGHI(base_loc, startindex_loc) else: self.mc.AGR(base_loc, startindex_loc) assert not length_loc.is_imm() # contents of r0 do not matter because r1 is zero, so # no copying takes place self.mc.XGR(r.r1, r.r1) assert base_loc.is_even() assert length_loc.value == base_loc.value + 1 # s390x has memset directly as a hardware instruction!! # it needs 5 registers allocated # dst = rX, dst len = rX+1 (ensured by the regalloc) # src = r0, src len = r1 self.mc.MVCLE(base_loc, r.r0, l.addr(0)) # NOTE this instruction can (determined by the cpu), just # quit the movement any time, thus it is looped until all bytes # are copied! self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
def test_literal_pool(self): gen_func_prolog(self.a.mc) self.a.mc.BRAS(r.r13, loc.imm(8 + self.mc.BRAS_byte_count)) self.a.mc.write('\x08\x07\x06\x05\x04\x03\x02\x01') self.a.mc.LG(r.r2, loc.addr(0, r.r13)) gen_func_epilog(self.a.mc) assert run_asm(self.a) == 0x0807060504030201
def emit_zero_array(self, op, arglocs, regalloc): base_loc, startindex_loc, length_loc, \ ofs_loc, itemsize_loc = arglocs if ofs_loc.is_imm(): assert check_imm_value(ofs_loc.value) self.mc.AGHI(base_loc, ofs_loc) else: self.mc.AGR(base_loc, ofs_loc) if startindex_loc.is_imm(): assert check_imm_value(startindex_loc.value) self.mc.AGHI(base_loc, startindex_loc) else: self.mc.AGR(base_loc, startindex_loc) assert not length_loc.is_imm() # contents of r0 do not matter because r1 is zero, so # no copying takes place self.mc.XGR(r.r1, r.r1) assert base_loc.is_even() assert length_loc.value == base_loc.value + 1 # s390x has memset directly as a hardware instruction!! # it needs 5 registers allocated # dst = rX, dst len = rX+1 (ensured by the regalloc) # src = r0, src len = r1 self.mc.MVCLE(base_loc, r.r0, l.addr(0)) # NOTE this instruction can (determined by the cpu), just # quit the movement any time, thus it is looped until all bytes # are copied! self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
def _emit_copycontent(self, arglocs, is_unicode): [src_ptr_loc, dst_ptr_loc, src_ofs_loc, dst_ofs_loc, length_loc] = arglocs if is_unicode: basesize, itemsize, _ = symbolic.get_array_token( rstr.UNICODE, self.cpu.translate_support_code) if itemsize == 2: scale = 1 elif itemsize == 4: scale = 2 else: raise AssertionError else: basesize, itemsize, _ = symbolic.get_array_token( rstr.STR, self.cpu.translate_support_code) assert itemsize == 1 basesize -= 1 # for the extra null character scale = 0 # src and src_len are tmp registers src = src_ptr_loc src_len = r.odd_reg(src) dst = r.r0 dst_len = r.r1 self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale) self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale) if length_loc.is_imm(): length = length_loc.getint() self.mc.load_imm(dst_len, length << scale) else: if scale > 0: self.mc.SLLG(dst_len, length_loc, l.addr(scale)) else: self.mc.LGR(dst_len, length_loc) # ensure that src_len is as long as dst_len, otherwise # padding bytes are written to dst self.mc.LGR(src_len, dst_len) self.mc.AGHI(src, l.imm(basesize)) self.mc.AGHI(dst, l.imm(basesize)) # s390x has memset directly as a hardware instruction!! # 0xB8 means we might reference dst later self.mc.MVCLE(dst, src, l.addr(0xB8)) # NOTE this instruction can (determined by the cpu), just # quit the movement any time, thus it is looped until all bytes # are copied! self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
def test_float_to_memory(self, v1, v2, res): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with self.label('lit'): self.mc.BRAS(r.r13, loc.imm(0)) self.mc.write(BFL(v1)) self.mc.write(BFL(v2)) self.mc.write(ADDR(mem)) self.jump_here(self.mc.BRAS, 'lit') self.mc.LD(r.f0, loc.addr(0, r.r13)) self.mc.LD(r.f1, loc.addr(8, r.r13)) self.mc.ADBR(r.f0, r.f1) self.mc.LG(r.r11, loc.addr(16, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) self.a.jmpto(r.r14) run_asm(self.a) assert isclose(mem[0],res)
def test_load_byte_zero_extend(self): adr = self.a.datablockwrapper.malloc_aligned(16, 16) data = rffi.cast(rffi.CArrayPtr(rffi.ULONG), adr) data[0] = rffi.cast(rffi.ULONG, intmask(0xffffFFFFffffFF02)) self.a.mc.load_imm(r.r3, adr+7) self.a.mc.LLGC(r.r2, loc.addr(0,r.r3)) self.a.mc.BCR(con.ANY, r.r14) assert run_asm(self.a) == 2
def _emit_copycontent(self, arglocs, is_unicode): [src_ptr_loc, dst_ptr_loc, src_ofs_loc, dst_ofs_loc, length_loc] = arglocs if is_unicode: basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE, self.cpu.translate_support_code) if itemsize == 2: scale = 1 elif itemsize == 4: scale = 2 else: raise AssertionError else: basesize, itemsize, _ = symbolic.get_array_token(rstr.STR, self.cpu.translate_support_code) assert itemsize == 1 scale = 0 # src and src_len are tmp registers src = src_ptr_loc src_len = r.odd_reg(src) dst = r.r0 dst_len = r.r1 self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale) self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale) if length_loc.is_imm(): length = length_loc.getint() self.mc.load_imm(dst_len, length << scale) else: if scale > 0: self.mc.SLLG(dst_len, length_loc, l.addr(scale)) else: self.mc.LGR(dst_len, length_loc) # ensure that src_len is as long as dst_len, otherwise # padding bytes are written to dst self.mc.LGR(src_len, dst_len) self.mc.AGHI(src, l.imm(basesize)) self.mc.AGHI(dst, l.imm(basesize)) # s390x has memset directly as a hardware instruction!! # 0xB8 means we might reference dst later self.mc.MVCLE(dst, src, l.addr(0xB8)) # NOTE this instruction can (determined by the cpu), just # quit the movement any time, thus it is looped until all bytes # are copied! self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count))
def emit_vec_int_signext(self, op, arglocs, regalloc): resloc, loc0, osizeloc, nsizeloc = arglocs # signext is only allowed if the data type sizes do not change. # e.g. [byte,byte] = sign_ext([byte, byte]), a simple move is sufficient! osize = osizeloc.value nsize = nsizeloc.value if osize == nsize: self.regalloc_mov(loc0, resloc) elif (osize == 4 and nsize == 8) or (osize == 8 and nsize == 4): self.mc.VLGV(r.SCRATCH, loc0, l.addr(0), l.itemsize_to_mask(osize)) self.mc.VLVG(resloc, r.SCRATCH, l.addr(0), l.itemsize_to_mask(nsize)) self.mc.VLGV(r.SCRATCH, loc0, l.addr(1), l.itemsize_to_mask(osize)) self.mc.VLVG(resloc, r.SCRATCH, l.addr(1), l.itemsize_to_mask(nsize)) if nsize == 8: self.mc.VSEG(resloc, resloc, l.itemsize_to_mask(osize))
def test_load_byte_zero_extend(self): adr = self.a.datablockwrapper.malloc_aligned(16, 16) data = rffi.cast(rffi.CArrayPtr(rffi.ULONG), adr) data[0] = rffi.cast(rffi.ULONG, intmask(0xffffFFFFffffFF02)) self.a.mc.load_imm(r.r3, adr + 7) self.a.mc.LLGC(r.r2, loc.addr(0, r.r3)) self.a.mc.BCR(con.ANY, r.r14) assert run_asm(self.a) == 2
def test_float_to_memory(self, v1, v2, res): with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 16) as mem: with ActivationRecordCtx(self): with self.label('lit'): self.mc.BRAS(r.r13, loc.imm(0)) self.mc.write(BFL(v1)) self.mc.write(BFL(v2)) self.mc.write(ADDR(mem)) self.jump_here(self.mc.BRAS, 'lit') self.mc.LD(r.f0, loc.addr(0, r.r13)) self.mc.LD(r.f1, loc.addr(8, r.r13)) self.mc.ADBR(r.f0, r.f1) self.mc.LG(r.r11, loc.addr(16, r.r13)) self.mc.STD(r.f0, loc.addr(0, r.r11)) self.a.jmpto(r.r14) run_asm(self.a) assert isclose(mem[0], res)
def _emit_threadlocalref_get(self, op, arglocs, regalloc): [resloc] = arglocs offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get' calldescr = op.getdescr() size = calldescr.get_result_size() sign = calldescr.is_result_signed() # # This loads the stack location THREADLOCAL_OFS into a # register, and then read the word at the given offset. # It is only supported if 'translate_support_code' is # true; otherwise, the execute_token() was done with a # dummy value for the stack location THREADLOCAL_OFS # assert self.cpu.translate_support_code assert resloc.is_reg() assert check_imm_value(offset) self.mc.LG(resloc, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP)) self._memory_read(resloc, l.addr(offset, resloc), size, sign)
def _emit_threadlocalref_get(self, op, arglocs, regalloc): [resloc] = arglocs offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get' calldescr = op.getdescr() size = calldescr.get_result_size() sign = calldescr.is_result_signed() # # This loads the stack location THREADLOCAL_OFS into a # register, and then read the word at the given offset. # It is only supported if 'translate_support_code' is # true; otherwise, the execute_token() was done with a # dummy value for the stack location THREADLOCAL_OFS # assert self.cpu.translate_support_code assert resloc.is_reg() assert check_imm_value(offset) self.mc.LG(resloc, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP)) self._memory_read(resloc, l.addr(offset, resloc), size, sign)
def _store_force_index(self, guard_op): assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or guard_op.getopnum() == rop.GUARD_NOT_FORCED_2) faildescr = guard_op.getdescr() ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr') # faildescrindex = self.get_gcref_from_faildescr(faildescr) self.load_gcref_into(r.SCRATCH, faildescrindex) self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
def test_uint_rshift(self): self.a.mc.XGR(r.r4, r.r4) self.a.mc.LGFI(r.r5, loc.imm(63)) self.a.mc.NGR(r.r4, r.r5) self.a.mc.LGFI(r.r3, loc.imm(18)) self.a.mc.LGFI(r.r2, loc.imm(-1)) self.a.mc.SRLG(r.r2, r.r3, loc.addr(18)) self.a.jmpto(r.r14) assert run_asm(self.a) == 0
def emit_vec_expand_i(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, loc0 = arglocs size = op.bytesize if loc0.is_core_reg(): self.mc.VLVG(resloc, loc0, l.addr(0), l.itemsize_to_mask(size)) self.mc.VREP(resloc, resloc, l.imm0, l.itemsize_to_mask(size)) else: self.mc.VLREP(resloc, loc0, l.itemsize_to_mask(size))
def dummy_argument(arg): """ NOT_RPYTHON """ if arg in ("r", "r/m", "m", "f", "-", "eo", "v"): return 0 if arg.startswith("i") or arg.startswith("u"): return 0 if arg.startswith("h"): return 0 return loc.addr(0)
def _store_force_index(self, guard_op): assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or guard_op.getopnum() == rop.GUARD_NOT_FORCED_2) faildescr = guard_op.getdescr() ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr') # faildescrindex = self.get_gcref_from_faildescr(faildescr) self.load_gcref_into(r.SCRATCH, faildescrindex) self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
def test_uint_rshift(self): self.a.mc.XGR(r.r4, r.r4) self.a.mc.LGFI(r.r5, loc.imm(63)) self.a.mc.NGR(r.r4, r.r5) self.a.mc.LGFI(r.r3, loc.imm(18)) self.a.mc.LGFI(r.r2, loc.imm(-1)) self.a.mc.SRLG(r.r2, r.r3, loc.addr(18)) self.a.jmpto(r.r14) assert run_asm(self.a) == 0
def emit_vec_expand_i(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, loc0 = arglocs size = op.bytesize if loc0.is_core_reg(): self.mc.VLVG(resloc, loc0, l.addr(0), l.itemsize_to_mask(size)) self.mc.VREP(resloc, resloc, l.imm0, l.itemsize_to_mask(size)) else: self.mc.VLREP(resloc, loc0, l.itemsize_to_mask(size))
def dummy_argument(arg): """ NOT_RPYTHON """ if arg in ('r', 'r/m', 'm', 'f', '-', 'eo', 'v'): return 0 if arg.startswith('i') or arg.startswith('u'): return 0 if arg.startswith('h'): return 0 return loc.addr(0)
def emit_guard_exception(self, op, arglocs, regalloc): loc, resloc = arglocs[:2] failargs = arglocs[2:] mc = self.mc mc.load_imm(r.SCRATCH, self.cpu.pos_exc_value()) diff = self.cpu.pos_exception() - self.cpu.pos_exc_value() assert check_imm_value(diff) mc.LG(r.SCRATCH2, l.addr(diff, r.SCRATCH)) mc.cmp_op(r.SCRATCH2, loc) self.guard_success_cc = c.EQ self._emit_guard(op, failargs) if resloc: mc.load(resloc, r.SCRATCH, 0) mc.LGHI(r.SCRATCH2, l.imm(0)) mc.STG(r.SCRATCH2, l.addr(0, r.SCRATCH)) mc.STG(r.SCRATCH2, l.addr(diff, r.SCRATCH))
def test_float(self): with ActivationRecordCtx(self): with self.label('lit'): self.mc.BRAS(r.r13, loc.imm(0)) self.mc.write(BFL(-15.0)) self.jump_here(self.mc.BRAS, 'lit') self.mc.LD(r.f0, loc.addr(0, r.r13)) self.mc.CGDBR(r.r2, msk.RND_CURMODE, r.f0) self.a.jmpto(r.r14) assert run_asm(self.a) == -15
def test_float(self): with ActivationRecordCtx(self): with self.label('lit'): self.mc.BRAS(r.r13, loc.imm(0)) self.mc.write(BFL(-15.0)) self.jump_here(self.mc.BRAS, 'lit') self.mc.LD(r.f0, loc.addr(0, r.r13)) self.mc.CGDBR(r.r2, msk.RND_CURMODE, r.f0) self.a.jmpto(r.r14) assert run_asm(self.a) == -15
def emit_guard_exception(self, op, arglocs, regalloc): loc, resloc = arglocs[:2] failargs = arglocs[2:] mc = self.mc mc.load_imm(r.SCRATCH, self.cpu.pos_exc_value()) diff = self.cpu.pos_exception() - self.cpu.pos_exc_value() assert check_imm_value(diff) mc.LG(r.SCRATCH2, l.addr(diff, r.SCRATCH)) mc.cmp_op(r.SCRATCH2, loc) self.guard_success_cc = c.EQ self._emit_guard(op, failargs) if resloc: mc.load(resloc, r.SCRATCH, 0) mc.LGHI(r.SCRATCH2, l.imm(0)) mc.STG(r.SCRATCH2, l.addr(0, r.SCRATCH)) mc.STG(r.SCRATCH2, l.addr(diff, r.SCRATCH))
def test_facility(self): adr = self.a.datablockwrapper.malloc_aligned(16, 16) self.a.mc.load_imm(r.r2, adr) self.a.mc.STFLE(loc.addr(0, r.r2)) self.a.mc.BCR(con.ANY, r.r14) run_asm(self.a) fac_data = rffi.cast(rffi.CArrayPtr(rffi.ULONG), adr) f64 = bin(fac_data[0])[2:] s64 = bin(fac_data[1])[2:] print(f64) print(s64) assert f64[18] == '1' # long displacement facility
def _call_assembler_check_descr(self, value, tmploc): ofs = self.cpu.get_ofs_of_frame_field('jf_descr') self.mc.LG(r.SCRATCH, l.addr(ofs, r.r2)) if check_imm_value(value): self.mc.cmp_op(r.SCRATCH, l.imm(value), imm=True) else: self.mc.load_imm(r.SCRATCH2, value) self.mc.cmp_op(r.SCRATCH, r.SCRATCH2, imm=False) jump_if_eq = self.mc.currpos() self.mc.trap() # patched later self.mc.write('\x00' * 4) # patched later return jump_if_eq
def test_facility(self): adr = self.a.datablockwrapper.malloc_aligned(16, 16) self.a.mc.load_imm(r.r2, adr) self.a.mc.STFLE(loc.addr(0,r.r2)) self.a.mc.BCR(con.ANY, r.r14) run_asm(self.a) fac_data = rffi.cast(rffi.CArrayPtr(rffi.ULONG), adr) f64 = bin(fac_data[0])[2:] s64 = bin(fac_data[1])[2:] print(f64) print(s64) assert f64[18] == '1' # long displacement facility
def _call_assembler_check_descr(self, value, tmploc): ofs = self.cpu.get_ofs_of_frame_field('jf_descr') self.mc.LG(r.SCRATCH, l.addr(ofs, r.r2)) if check_imm_value(value): self.mc.cmp_op(r.SCRATCH, l.imm(value), imm=True) else: self.mc.load_imm(r.SCRATCH2, value) self.mc.cmp_op(r.SCRATCH, r.SCRATCH2, imm=False) jump_if_eq = self.mc.currpos() self.mc.trap() # patched later self.mc.write('\x00' * 4) # patched later return jump_if_eq
def call_releasegil_addr_and_move_real_arguments(self, fastgil): assert self.is_call_release_gil RSHADOWOLD = self.RSHADOWOLD RSHADOWPTR = self.RSHADOWPTR RFASTGILPTR = self.RFASTGILPTR # pos = STD_FRAME_SIZE_IN_BYTES - 7 * WORD self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP)) # # Save this thread's shadowstack pointer into r8, for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.load_imm(RSHADOWPTR, rst) self.mc.load(RSHADOWOLD, RSHADOWPTR, 0) # # change 'rpy_fastgil' to 0 (it should be non-zero right now) self.mc.load_imm(RFASTGILPTR, fastgil) self.mc.XGR(r.SCRATCH, r.SCRATCH) # zarch is sequentially consistent self.mc.STG(r.SCRATCH, l.addr(0, RFASTGILPTR))
def call_releasegil_addr_and_move_real_arguments(self, fastgil): assert self.is_call_release_gil RSHADOWOLD = self.RSHADOWOLD RSHADOWPTR = self.RSHADOWPTR RFASTGILPTR = self.RFASTGILPTR # pos = STD_FRAME_SIZE_IN_BYTES - CALL_RELEASE_GIL_STACK_OFF self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP)) # # Save this thread's shadowstack pointer into r8, for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.load_imm(RSHADOWPTR, rst) self.mc.load(RSHADOWOLD, RSHADOWPTR, 0) # # change 'rpy_fastgil' to 0 (it should be non-zero right now) self.mc.load_imm(RFASTGILPTR, fastgil) self.mc.XGR(r.SCRATCH, r.SCRATCH) # zarch is sequentially consistent self.mc.STG(r.SCRATCH, l.addr(0, RFASTGILPTR))