Beispiel #1
0
        def INSN(self, loc1, loc2):
            code1 = loc1.location_code()
            code2 = loc2.location_code()

            # You can pass in the scratch register as a location, but you
            # must be careful not to combine it with location types that
            # might need to use the scratch register themselves.
            if loc2 is X86_64_SCRATCH_REG:
                if code1 == 'j':
                    assert (name.startswith("MOV")
                            and rx86.fits_in_32bits(loc1.value_j()))
            if loc1 is X86_64_SCRATCH_REG and not name.startswith("MOV"):
                assert code2 not in ('j', 'i')

            for possible_code2 in unrolling_location_codes:
                if not has_implementation_for('?', possible_code2):
                    continue
                if code2 == possible_code2:
                    val2 = getattr(loc2, "value_" + possible_code2)()
                    #
                    # Fake out certain operations for x86_64
                    if self.WORD == 8 and possible_code2 == 'i' and not rx86.fits_in_32bits(
                            val2):
                        insn_with_64_bit_immediate(self, loc1, loc2)
                        return
                    #
                    # Regular case
                    for possible_code1 in unrolling_location_codes:
                        if not has_implementation_for(possible_code1,
                                                      possible_code2):
                            continue
                        if code1 == possible_code1:
                            val1 = getattr(loc1, "value_" + possible_code1)()
                            # More faking out of certain operations for x86_64
                            fits32 = rx86.fits_in_32bits
                            if possible_code1 == 'j' and not fits32(val1):
                                val1 = self._addr_as_reg_offset(val1)
                                invoke(self, "m" + possible_code2, val1, val2)
                                return
                            if possible_code2 == 'j' and not fits32(val2):
                                val2 = self._addr_as_reg_offset(val2)
                                invoke(self, possible_code1 + "m", val1, val2)
                                return
                            if possible_code1 == 'm' and not fits32(val1[1]):
                                val1 = self._fix_static_offset_64_m(val1)
                            if possible_code2 == 'm' and not fits32(val2[1]):
                                val2 = self._fix_static_offset_64_m(val2)
                            if possible_code1 == 'a' and not fits32(val1[3]):
                                val1 = self._fix_static_offset_64_a(val1)
                            if possible_code2 == 'a' and not fits32(val2[3]):
                                val2 = self._fix_static_offset_64_a(val2)
                            invoke(self, possible_code1 + possible_code2, val1,
                                   val2)
                            return
            _missing_binary_insn(name, code1, code2)
Beispiel #2
0
        def INSN(self, loc1, loc2):
            code1 = loc1.location_code()
            code2 = loc2.location_code()

            # You can pass in the scratch register as a location, but you
            # must be careful not to combine it with location types that
            # might need to use the scratch register themselves.
            if loc2 is X86_64_SCRATCH_REG:
                if code1 == 'j':
                    assert (name.startswith("MOV") and
                            rx86.fits_in_32bits(loc1.value_j()))
            if loc1 is X86_64_SCRATCH_REG and not name.startswith("MOV"):
                assert code2 not in ('j', 'i')

            for possible_code2 in unrolling_location_codes:
                if not has_implementation_for('?', possible_code2):
                    continue
                if code2 == possible_code2:
                    val2 = getattr(loc2, "value_" + possible_code2)()
                    #
                    # Fake out certain operations for x86_64
                    if self.WORD == 8 and possible_code2 == 'i' and not rx86.fits_in_32bits(val2):
                        insn_with_64_bit_immediate(self, loc1, loc2)
                        return
                    #
                    # Regular case
                    for possible_code1 in unrolling_location_codes:
                        if not has_implementation_for(possible_code1,
                                                      possible_code2):
                            continue
                        if code1 == possible_code1:
                            val1 = getattr(loc1, "value_" + possible_code1)()
                            # More faking out of certain operations for x86_64
                            fits32 = rx86.fits_in_32bits
                            if possible_code1 == 'j' and not fits32(val1):
                                val1 = self._addr_as_reg_offset(val1)
                                invoke(self, "m" + possible_code2, val1, val2)
                                return
                            if possible_code2 == 'j' and not fits32(val2):
                                val2 = self._addr_as_reg_offset(val2)
                                invoke(self, possible_code1 + "m", val1, val2)
                                return
                            if possible_code1 == 'm' and not fits32(val1[1]):
                                val1 = self._fix_static_offset_64_m(val1)
                            if possible_code2 == 'm' and not fits32(val2[1]):
                                val2 = self._fix_static_offset_64_m(val2)
                            if possible_code1 == 'a' and not fits32(val1[3]):
                                val1 = self._fix_static_offset_64_a(val1)
                            if possible_code2 == 'a' and not fits32(val2[3]):
                                val2 = self._fix_static_offset_64_a(val2)
                            invoke(self, possible_code1 + possible_code2, val1, val2)
                            return
            _missing_binary_insn(name, code1, code2)
Beispiel #3
0
def test_follow_jump_instructions_32():
    buf = lltype.malloc(rffi.CCHARP.TO, 80, flavor='raw')
    raw = rffi.cast(lltype.Signed, buf)
    if not fits_in_32bits(raw):
        lltype.free(buf, flavor='raw')
        py.test.skip("not testable")
    mc = Fake32CodeBlockWrapper(); mc.WORD = 4; mc.relocations = []
    mc.RET()
    mc.copy_to_raw_memory(raw)
    mc = Fake32CodeBlockWrapper(); mc.WORD = 4; mc.relocations = []
    assert follow_jump(raw) == raw
    mc.JMP(imm(raw))
    mc.copy_to_raw_memory(raw + 20)
    assert buf[20] == '\xE9'    # JMP
    assert buf[21] == '\xE7'    #     -25
    assert buf[22] == '\xFF'
    assert buf[23] == '\xFF'
    assert buf[24] == '\xFF'
    mc = Fake32CodeBlockWrapper(); mc.WORD = 4; mc.relocations = []
    assert follow_jump(raw + 20) == raw
    mc.JMP(imm(raw))
    mc.copy_to_raw_memory(raw + 40)
    assert buf[40] == '\xE9'    # JMP
    assert buf[41] == '\xD3'    #     -45
    assert buf[42] == '\xFF'
    assert buf[43] == '\xFF'
    assert buf[44] == '\xFF'
    assert follow_jump(raw + 40) == raw
    lltype.free(buf, flavor='raw')
Beispiel #4
0
 def insn_with_64_bit_immediate(self, loc1, loc2):
     # These are the worst cases:
     val2 = loc2.value_i()
     if name == 'MOV' and isinstance(loc1, RegLoc):
         self.MOV_ri(loc1.value, val2)
         return
     code1 = loc1.location_code()
     if code1 == 'j':
         checkvalue = loc1.value_j()
     elif code1 == 'm':
         checkvalue = loc1.value_m()[1]
     elif code1 == 'a':
         checkvalue = loc1.value_a()[3]
     else:
         checkvalue = 0
     if not rx86.fits_in_32bits(checkvalue):
         # INSN_ji, and both operands are 64-bit; or INSN_mi or INSN_ai
         # and the constant offset in the address is 64-bit.
         # Hopefully this doesn't happen too often
         freereg = loc1.find_unused_reg()
         self.PUSH_r(freereg.value)
         self.MOV_ri(freereg.value, val2)
         INSN(self, loc1, freereg)
         self.POP_r(freereg.value)
     else:
         # For this case, we should not need the scratch register more than here.
         self._load_scratch(val2)
         INSN(self, loc1, X86_64_SCRATCH_REG)
Beispiel #5
0
 def _load_scratch(self, value):
     if self._scratch_register_value != -1:
         if self._scratch_register_value == value:
             #print '_load_scratch(%x) [REUSED]' % (value,)
             return
         offset = r_uint(value) - r_uint(self._scratch_register_value)
         offset = intmask(offset)
         if rx86.fits_in_32bits(offset):
             #print '_load_scratch(%x) [LEA r11+%d]' % (value, offset)
             #global COUNT_
             #try:
             #    COUNT_ += 1
             #except NameError:
             #    COUNT_ = 1
             #if COUNT_ % 182 == 0:
             #    import pdb;pdb.set_trace()
             self.LEA_rm(X86_64_SCRATCH_REG.value,
                         (X86_64_SCRATCH_REG.value, offset))
             self._scratch_register_value = value
             return
         #print '_load_scratch(%x) [too far]' % (value,)
     #else:
     #    print '_load_scratch(%x) [new]' % (value,)
     self._scratch_register_value = value
     self.MOV_ri(X86_64_SCRATCH_REG.value, value)
Beispiel #6
0
def test_follow_jump_instructions_32():
    buf = lltype.malloc(rffi.CCHARP.TO, 80, flavor='raw')
    raw = rffi.cast(lltype.Signed, buf)
    if not fits_in_32bits(raw):
        lltype.free(buf, flavor='raw')
        py.test.skip("not testable")
    mc = Fake32CodeBlockWrapper()
    mc.WORD = 4
    mc.relocations = []
    mc.RET()
    mc.copy_to_raw_memory(raw)
    mc = Fake32CodeBlockWrapper()
    mc.WORD = 4
    mc.relocations = []
    assert follow_jump(raw) == raw
    mc.JMP(imm(raw))
    mc.copy_to_raw_memory(raw + 20)
    assert buf[20] == '\xE9'  # JMP
    assert buf[21] == '\xE7'  #     -25
    assert buf[22] == '\xFF'
    assert buf[23] == '\xFF'
    assert buf[24] == '\xFF'
    mc = Fake32CodeBlockWrapper()
    mc.WORD = 4
    mc.relocations = []
    assert follow_jump(raw + 20) == raw
    mc.JMP(imm(raw))
    mc.copy_to_raw_memory(raw + 40)
    assert buf[40] == '\xE9'  # JMP
    assert buf[41] == '\xD3'  #     -45
    assert buf[42] == '\xFF'
    assert buf[43] == '\xFF'
    assert buf[44] == '\xFF'
    assert follow_jump(raw + 40) == raw
    lltype.free(buf, flavor='raw')
Beispiel #7
0
 def INSN(self, loc):
     code = loc.location_code()
     for possible_code in unrolling_location_codes:
         if code == possible_code:
             val = getattr(loc, "value_" + possible_code)()
             if self.WORD == 8 and possible_code == 'i' and not rx86.fits_in_32bits(val):
                 self._load_scratch(val)
                 _rx86_getattr(self, name + "_r")(X86_64_SCRATCH_REG.value)
             else:
                 methname = name + "_" + possible_code
                 _rx86_getattr(self, methname)(val)
Beispiel #8
0
 def test_bug_setfield_64bit(self):
     if WORD == 4:
         py.test.skip("only for 64 bits")
     TP = lltype.GcStruct('S', ('i', lltype.Signed))
     ofsi = self.cpu.fielddescrof(TP, 'i')
     for i in range(500):
         p = lltype.malloc(TP)
         addr = rffi.cast(lltype.Signed, p)
         if fits_in_32bits(addr):
             break  # fitting in 32 bits, good
     else:
         py.test.skip("cannot get a 32-bit pointer")
     res = ConstPtr(rffi.cast(llmemory.GCREF, addr))
     self.execute_operation(rop.SETFIELD_RAW, [res, ConstInt(3**33)],
                            'void', ofsi)
     assert p.i == 3**33
Beispiel #9
0
 def test_bug_setfield_64bit(self):
     if WORD == 4:
         py.test.skip("only for 64 bits")
     TP = lltype.GcStruct('S', ('i', lltype.Signed))
     ofsi = self.cpu.fielddescrof(TP, 'i')
     for i in range(500):
         p = lltype.malloc(TP)
         addr = rffi.cast(lltype.Signed, p)
         if fits_in_32bits(addr):
             break    # fitting in 32 bits, good
     else:
         py.test.skip("cannot get a 32-bit pointer")
     res = ConstPtr(rffi.cast(llmemory.GCREF, addr))
     self.execute_operation(rop.SETFIELD_RAW, [res, ConstInt(3**33)],
                            'void', ofsi)
     assert p.i == 3**33
Beispiel #10
0
    def _addr_as_reg_offset(self, addr):
        # Encodes a (64-bit) address as an offset from the scratch register.
        # If we are within a "reuse_scratch_register" block, we remember the
        # last value we loaded to the scratch register and encode the address
        # as an offset from that if we can
        if self._scratch_register_known:
            offset = addr - self._scratch_register_value
            if rx86.fits_in_32bits(offset):
                return (X86_64_SCRATCH_REG.value, offset)
            # else: fall through

        if self._reuse_scratch_register:
            self._scratch_register_known = True
            self._scratch_register_value = addr

        self.MOV_ri(X86_64_SCRATCH_REG.value, addr)
        return (X86_64_SCRATCH_REG.value, 0)
Beispiel #11
0
 def _addr_as_reg_offset(self, addr):
     # Encodes a (64-bit) address as an offset from the scratch register.
     # If we are within a "reuse_scratch_register" block, we remember the
     # last value we loaded to the scratch register and encode the address
     # as an offset from that if we can
     if self._scratch_register_value != -1:
         offset = r_uint(addr) - r_uint(self._scratch_register_value)
         offset = intmask(offset)
         if rx86.fits_in_32bits(offset):
             #print '_addr_as_reg_offset(%x) [REUSED r11+%d]' % (
             #    addr, offset)
             return (X86_64_SCRATCH_REG.value, offset)
         #print '_addr_as_reg_offset(%x) [too far]' % (addr,)
         # else: fall through
     #else:
     #    print '_addr_as_reg_offset(%x) [new]' % (addr,)
     self._scratch_register_value = addr
     self.MOV_ri(X86_64_SCRATCH_REG.value, addr)
     return (X86_64_SCRATCH_REG.value, 0)
Beispiel #12
0
 def move_real_result_and_call_reacqgil_addr(self, fastgil):
     from rpython.jit.backend.x86 import rx86
     #
     # check if we need to call the reacqgil() function or not
     # (to acquiring the GIL, remove the asmgcc head from
     # the chained list, etc.)
     mc = self.mc
     restore_edx = False
     if not self.asm._is_asmgcc():
         css = 0
         css_value = imm(0)
         old_value = ecx
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             assert css >= 16
             if self.restype == 'L':  # long long result: eax/edx
                 if not self.result_value_saved_early:
                     mc.MOV_sr(12, edx.value)
                     restore_edx = True
             css_value = edx
             old_value = ecx
         elif IS_X86_64:
             css_value = edi
             old_value = esi
         mc.LEA_rs(css_value.value, css)
     #
     # Use XCHG as an atomic test-and-set-lock.  It also implicitly
     # does a memory barrier.
     mc.MOV(old_value, imm(1))
     if rx86.fits_in_32bits(fastgil):
         mc.XCHG_rj(old_value.value, fastgil)
     else:
         mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
         mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
     mc.CMP(old_value, css_value)
     #
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if bool(gcrootmap) and gcrootmap.is_shadow_stack:
         from rpython.jit.backend.x86.assembler import heap
         #
         # When doing a call_release_gil with shadowstack, there
         # is the risk that the 'rpy_fastgil' was free but the
         # current shadowstack can be the one of a different
         # thread.  So here we check if the shadowstack pointer
         # is still the same as before we released the GIL (saved
         # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
         mc.J_il8(rx86.Conditions['NE'], 0)
         jne_location = mc.get_relative_pos()
         # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
         # state before the XCHG, but the XCHG acquired it by writing 1)
         rst = gcrootmap.get_root_stack_top_addr()
         mc = self.mc
         mc.CMP(ebx, heap(rst))
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
         # revert the rpy_fastgil acquired above, so that the
         # general 'reacqgil_addr' below can acquire it again...
         mc.MOV(heap(fastgil), ecx)
         # patch the JNE above
         offset = mc.get_relative_pos() - jne_location
         assert 0 < offset <= 127
         mc.overwrite(jne_location - 1, chr(offset))
     else:
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
     #
     # Yes, we need to call the reacqgil() function
     if not self.result_value_saved_early:
         self.save_result_value(save_edx=False)
     if self.asm._is_asmgcc():
         if IS_X86_32:
             mc.MOV_sr(4, old_value.value)
             mc.MOV_sr(0, css_value.value)
         # on X86_64, they are already in the right registers
     mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
     if not self.result_value_saved_early:
         self.restore_result_value(save_edx=False)
     #
     # patch the JE above
     offset = mc.get_relative_pos() - je_location
     assert 0 < offset <= 127
     mc.overwrite(je_location - 1, chr(offset))
     #
     if restore_edx:
         mc.MOV_rs(edx.value, 12)  # restore this
     #
     if self.result_value_saved_early:
         self.restore_result_value(save_edx=True)
     #
     if not we_are_translated():  # for testing: now we can accesss
         mc.SUB(ebp, imm(1))  # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP)
         mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Beispiel #13
0
    def move_real_result_and_call_reacqgil_addr(self, fastgil):
        from rpython.jit.backend.x86 import rx86
        #
        # check if we need to call the reacqgil() function or not
        # (to acquiring the GIL)
        mc = self.mc
        restore_edx = False
        #
        # Make sure we can use 'eax' in the sequel for CMPXCHG
        # On 32-bit, we also need to check if restype is 'L' for long long,
        # in which case we need to save eax and edx because they are both
        # used for the return value.
        if self.restype in (INT, 'L') and not self.result_value_saved_early:
            self.save_result_value(save_edx=self.restype == 'L')
            self.result_value_saved_early = True
        #
        # Use LOCK CMPXCHG as a compare-and-swap with memory barrier.
        tlsreg = self.get_tlofs_reg()
        thread_ident_ofs = lltls.get_thread_ident_offset(self.asm.cpu)
        #
        mc.MOV_rm(ecx.value, (tlsreg.value, thread_ident_ofs))
        mc.XOR_rr(eax.value, eax.value)

        if rx86.fits_in_32bits(fastgil):
            mc.LOCK()
            mc.CMPXCHG_jr(fastgil, ecx.value)
        else:
            mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
            mc.LOCK()
            mc.CMPXCHG_mr((X86_64_SCRATCH_REG.value, 0), ecx.value)
        #
        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if bool(gcrootmap):
            from rpython.jit.backend.x86.assembler import heap
            assert gcrootmap.is_shadow_stack
            #
            # When doing a call_release_gil with shadowstack, there
            # is the risk that the 'rpy_fastgil' was free but the
            # current shadowstack can be the one of a different
            # thread.  So here we check if the shadowstack pointer
            # is still the same as before we released the GIL (saved
            # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
            mc.J_il(rx86.Conditions['NZ'], 0xfffff)  # patched later
            early_jump_addr = mc.get_relative_pos(break_basic_block=False)
            # ^^^ this jump will go to almost the same place as the
            # ReacqGilSlowPath() computes, but one instruction further,
            # i.e. just after the "MOV(heap(fastgil), 0)".

            rst = gcrootmap.get_root_stack_top_addr()
            mc = self.mc
            mc.CMP(ebx, heap(rst))
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
            sp.early_jump_addr = early_jump_addr
            sp.fastgil = fastgil
        else:
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NZ'])
        sp.callbuilder = self
        sp.set_continue_addr(mc)
        self.asm.pending_slowpaths.append(sp)
        #
        if restore_edx:
            mc.MOV_rs(edx.value, 12)  # restore this
        #
        if self.result_value_saved_early:
            self.restore_result_value(save_edx=True)
        #
        if not we_are_translated():  # for testing: now we can accesss
            mc.SUB(ebp, imm(1))  # ebp again
Beispiel #14
0
 def move_real_result_and_call_reacqgil_addr(self, fastgil):
     from rpython.jit.backend.x86 import rx86
     #
     # check if we need to call the reacqgil() function or not
     # (to acquiring the GIL, remove the asmgcc head from
     # the chained list, etc.)
     mc = self.mc
     restore_edx = False
     if not self.asm._is_asmgcc():
         css = 0
         css_value = imm(0)
         old_value = ecx
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             assert css >= 16
             if self.restype == 'L':    # long long result: eax/edx
                 mc.MOV_sr(12, edx.value)
                 restore_edx = True
             css_value = edx
             old_value = ecx
         elif IS_X86_64:
             css_value = edi
             old_value = esi
         mc.LEA_rs(css_value.value, css)
     #
     mc.MOV(old_value, imm(1))
     if rx86.fits_in_32bits(fastgil):
         mc.XCHG_rj(old_value.value, fastgil)
     else:
         mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
         mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
     mc.CMP(old_value, css_value)
     mc.J_il8(rx86.Conditions['E'], 0)
     je_location = mc.get_relative_pos()
     #
     # Yes, we need to call the reacqgil() function
     self.save_result_value_reacq()
     if self.asm._is_asmgcc():
         if IS_X86_32:
             mc.MOV_sr(4, old_value.value)
             mc.MOV_sr(0, css_value.value)
         # on X86_64, they are already in the right registers
     mc.CALL(imm(self.asm.reacqgil_addr))
     self.restore_result_value_reacq()
     #
     # patch the JE above
     offset = mc.get_relative_pos() - je_location
     assert 0 < offset <= 127
     mc.overwrite(je_location-1, chr(offset))
     #
     if restore_edx:
         mc.MOV_rs(edx.value, 12)   # restore this
     #
     if not we_are_translated():    # for testing: now we can accesss
         mc.SUB(ebp, imm(1))        # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Beispiel #15
0
 def move_real_result_and_call_reacqgil_addr(self, fastgil):
     from rpython.jit.backend.x86 import rx86
     #
     # check if we need to call the reacqgil() function or not
     # (to acquiring the GIL, remove the asmgcc head from
     # the chained list, etc.)
     mc = self.mc
     restore_edx = False
     if not self.asm._is_asmgcc():
         css = 0
         css_value = imm(0)
         old_value = ecx
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             assert css >= 16
             if self.restype == 'L':    # long long result: eax/edx
                 if not self.result_value_saved_early:
                     mc.MOV_sr(12, edx.value)
                     restore_edx = True
             css_value = edx
             old_value = ecx
         elif IS_X86_64:
             css_value = edi
             old_value = esi
         mc.LEA_rs(css_value.value, css)
     #
     # Use XCHG as an atomic test-and-set-lock.  It also implicitly
     # does a memory barrier.
     mc.MOV(old_value, imm(1))
     if rx86.fits_in_32bits(fastgil):
         mc.XCHG_rj(old_value.value, fastgil)
     else:
         mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
         mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
     mc.CMP(old_value, css_value)
     #
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if bool(gcrootmap) and gcrootmap.is_shadow_stack:
         from rpython.jit.backend.x86.assembler import heap
         #
         # When doing a call_release_gil with shadowstack, there
         # is the risk that the 'rpy_fastgil' was free but the
         # current shadowstack can be the one of a different
         # thread.  So here we check if the shadowstack pointer
         # is still the same as before we released the GIL (saved
         # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
         mc.J_il8(rx86.Conditions['NE'], 0)
         jne_location = mc.get_relative_pos()
         # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
         # state before the XCHG, but the XCHG acquired it by writing 1)
         rst = gcrootmap.get_root_stack_top_addr()
         mc = self.mc
         mc.CMP(ebx, heap(rst))
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
         # revert the rpy_fastgil acquired above, so that the
         # general 'reacqgil_addr' below can acquire it again...
         mc.MOV(heap(fastgil), ecx)
         # patch the JNE above
         offset = mc.get_relative_pos() - jne_location
         assert 0 < offset <= 127
         mc.overwrite(jne_location-1, chr(offset))
     else:
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
     #
     # Yes, we need to call the reacqgil() function
     if not self.result_value_saved_early:
         self.save_result_value(save_edx=False)
     if self.asm._is_asmgcc():
         if IS_X86_32:
             mc.MOV_sr(4, old_value.value)
             mc.MOV_sr(0, css_value.value)
         # on X86_64, they are already in the right registers
     mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
     if not self.result_value_saved_early:
         self.restore_result_value(save_edx=False)
     #
     # patch the JE above
     offset = mc.get_relative_pos() - je_location
     assert 0 < offset <= 127
     mc.overwrite(je_location-1, chr(offset))
     #
     if restore_edx:
         mc.MOV_rs(edx.value, 12)   # restore this
     #
     if self.result_value_saved_early:
         self.restore_result_value(save_edx=True)
     #
     if not we_are_translated():    # for testing: now we can accesss
         mc.SUB(ebp, imm(1))        # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Beispiel #16
0
    def move_real_result_and_call_reacqgil_addr(self, fastgil):
        from rpython.jit.backend.x86 import rx86
        #
        # check if we need to call the reacqgil() function or not
        # (to acquiring the GIL, remove the asmgcc head from
        # the chained list, etc.)
        mc = self.mc
        restore_edx = False
        if not self.asm._is_asmgcc():
            css = 0
            css_value = imm(0)
            old_value = ecx
        else:
            from rpython.memory.gctransform import asmgcroot
            css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
            if IS_X86_32:
                assert css >= 16
                if self.restype == 'L':  # long long result: eax/edx
                    if not self.result_value_saved_early:
                        mc.MOV_sr(12, edx.value)
                        restore_edx = True
                css_value = edx  # note: duplicated in ReacqGilSlowPath
                old_value = ecx  #
            elif IS_X86_64:
                css_value = edi
                old_value = esi
            mc.LEA_rs(css_value.value, css)
        #
        # Use XCHG as an atomic test-and-set-lock.  It also implicitly
        # does a memory barrier.
        mc.MOV(old_value, imm(1))
        if rx86.fits_in_32bits(fastgil):
            mc.XCHG_rj(old_value.value, fastgil)
        else:
            mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
            mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
        mc.CMP(old_value, css_value)
        #
        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if bool(gcrootmap) and gcrootmap.is_shadow_stack:
            from rpython.jit.backend.x86.assembler import heap
            #
            # When doing a call_release_gil with shadowstack, there
            # is the risk that the 'rpy_fastgil' was free but the
            # current shadowstack can be the one of a different
            # thread.  So here we check if the shadowstack pointer
            # is still the same as before we released the GIL (saved
            # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
            mc.J_il(rx86.Conditions['NE'], 0xfffff)  # patched later
            early_jump_addr = mc.get_relative_pos(break_basic_block=False)
            # ^^^ this jump will go to almost the same place as the
            # ReacqGilSlowPath() computes, but one instruction farther,
            # i.e. just after the "MOV(heap(fastgil), ecx)".

            # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
            # state before the XCHG, but the XCHG acquired it by writing 1)
            rst = gcrootmap.get_root_stack_top_addr()
            mc = self.mc
            mc.CMP(ebx, heap(rst))
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
            sp.early_jump_addr = early_jump_addr
            sp.fastgil = fastgil
        else:
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
        sp.callbuilder = self
        sp.set_continue_addr(mc)
        self.asm.pending_slowpaths.append(sp)
        #
        if restore_edx:
            mc.MOV_rs(edx.value, 12)  # restore this
        #
        if self.result_value_saved_early:
            self.restore_result_value(save_edx=True)
        #
        if not we_are_translated():  # for testing: now we can accesss
            mc.SUB(ebp, imm(1))  # ebp again
        #
        # Now that we required the GIL, we can reload a possibly modified ebp
        if self.asm._is_asmgcc():
            # special-case: reload ebp from the css
            from rpython.memory.gctransform import asmgcroot
            index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP)
            mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Beispiel #17
0
        def INSN(self, loc1, loc2):
            code1 = loc1.location_code()
            code2 = loc2.location_code()

            # You cannot pass in the scratch register as a location,
            # except with a MOV instruction.
            if name.startswith('MOV'):
                if loc2 is X86_64_SCRATCH_REG:
                    assert code1 != 'j' and code1 != 'm' and code1 != 'a'
                if loc1 is X86_64_SCRATCH_REG:
                    self.forget_scratch_register()
            elif loc1 is X86_64_SCRATCH_REG or loc2 is X86_64_SCRATCH_REG:
                raise AssertionError("%s with scratch reg specified" % name)

            for possible_code2 in unrolling_location_codes:
                if not has_implementation_for('?', possible_code2):
                    continue
                if code2 == possible_code2:
                    val2 = getattr(loc2, "value_" + possible_code2)()
                    #
                    # Fake out certain operations for x86_64
                    if self.WORD == 8 and possible_code2 == 'i' and not rx86.fits_in_32bits(
                            val2):
                        if insn_with_64_bit_immediate(self, loc1, loc2):
                            return  # done
                        loc2 = X86_64_SCRATCH_REG
                        code2 = 'r'
                        # NB. unrolling_location_codes contains 'r'
                        # after 'i', so that it will be found after
                        # this iteration
                        continue
                    #
                    # Regular case
                    for possible_code1 in unrolling_location_codes:
                        if not has_implementation_for(possible_code1,
                                                      possible_code2):
                            continue
                        if code1 == possible_code1:
                            val1 = getattr(loc1, "value_" + possible_code1)()
                            # More faking out of certain operations for x86_64
                            fits32 = rx86.fits_in_32bits
                            if possible_code1 == 'j' and not fits32(val1):
                                val1 = self._addr_as_reg_offset(val1)
                                invoke(self, "m" + possible_code2, val1, val2)
                                return
                            if possible_code2 == 'j' and not fits32(val2):
                                val2 = self._addr_as_reg_offset(val2)
                                invoke(self, possible_code1 + "m", val1, val2)
                                return
                            if possible_code1 == 'm' and not fits32(val1[1]):
                                val1 = self._fix_static_offset_64_m(val1)
                            if possible_code2 == 'm' and not fits32(val2[1]):
                                val2 = self._fix_static_offset_64_m(val2)
                            if possible_code1 == 'a' and not fits32(val1[3]):
                                val1 = self._fix_static_offset_64_a(val1)
                            if possible_code2 == 'a' and not fits32(val2[3]):
                                val2 = self._fix_static_offset_64_a(val2)
                            invoke(self, possible_code1 + possible_code2, val1,
                                   val2)
                            return
            _missing_binary_insn(name, code1, code2)
Beispiel #18
0
 def consider_int_sub(self, op, position):
     y = op.getarg(1)
     if isinstance(y, ConstInt) and rx86.fits_in_32bits(-y.value):
         pass  # nothing to be hinted
     else:
         self._consider_binop(op, position)