Ejemplo n.º 1
0
    def test_reuse_scratch_register(self):
        base_addr = intmask(0xFEDCBA9876543210)
        cb = LocationCodeBuilder64()
        cb.MOV(ecx, heap(base_addr))
        cb.MOV(ecx, heap(base_addr + 8))

        expected_instructions = (
            # mov r11, 0xFEDCBA9876543210
            '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' +
            # mov rcx, [r11]
            '\x49\x8B\x0B' +
            # mov rcx, [r11+8]
            '\x49\x8B\x4B\x08')
        assert cb.getvalue() == expected_instructions
Ejemplo n.º 2
0
        def generate_body(self, assembler, mc):
            if self.early_jump_addr != 0:
                # This slow-path has two entry points, with two
                # conditional jumps.  We can jump to the regular start
                # of this slow-path with the 2nd conditional jump.  Or,
                # we can jump past the "MOV(heap(fastgil), ecx)"
                # instruction from the 1st conditional jump.
                # This instruction reverts the rpy_fastgil acquired
                # previously, so that the general 'reacqgil_addr'
                # function can acquire it again.  It must only be done
                # if we actually succeeded in acquiring rpy_fastgil.
                from rpython.jit.backend.x86.assembler import heap
                mc.MOV(heap(self.fastgil), ecx)
                offset = mc.get_relative_pos() - self.early_jump_addr
                mc.overwrite32(self.early_jump_addr - 4, offset)
                # scratch register forgotten here, by get_relative_pos()

            # call the reacqgil() function
            cb = self.callbuilder
            if not cb.result_value_saved_early:
                cb.save_result_value(save_edx=False)
            if assembler._is_asmgcc():
                if IS_X86_32:
                    css_value = edx
                    old_value = ecx
                    mc.MOV_sr(4, old_value.value)
                    mc.MOV_sr(0, css_value.value)
                # on X86_64, they are already in the right registers
            mc.CALL(imm(follow_jump(assembler.reacqgil_addr)))
            if not cb.result_value_saved_early:
                cb.restore_result_value(save_edx=False)
Ejemplo n.º 3
0
 def call_releasegil_addr_and_move_real_arguments(self, fastgil):
     from rpython.jit.backend.x86.assembler import heap
     assert self.is_call_release_gil
     #
     # Save this thread's shadowstack pointer into 'ebx',
     # for later comparison
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if gcrootmap:
         if gcrootmap.is_shadow_stack:
             rst = gcrootmap.get_root_stack_top_addr()
             self.mc.MOV(ebx, heap(rst))
     #
     if not self.asm._is_asmgcc():
         # shadowstack: change 'rpy_fastgil' to 0 (it should be
         # non-zero right now).
         self.change_extra_stack_depth = False
         # ^^ note that set_extra_stack_depth() in this case is a no-op
         css_value = imm(0)
     else:
         from rpython.memory.gctransform import asmgcroot
         # build a 'css' structure on the stack: 2 words for the linkage,
         # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
         # total size of JIT_USE_WORDS.  This structure is found at
         # [ESP+css].
         css = -self.get_current_esp() + (
             WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
         assert css >= 2 * WORD
         # Save ebp
         index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP)
         self.mc.MOV_sr(index_of_ebp, ebp.value)  # MOV [css.ebp], EBP
         # Save the "return address": we pretend that it's css
         self.mc.LEA_rs(eax.value, css)  # LEA eax, [css]
         frame_ptr = css + WORD * (2 + asmgcroot.FRAME_PTR)
         self.mc.MOV_sr(frame_ptr, eax.value)  # MOV [css.frame], eax
         # Set up jf_extra_stack_depth to pretend that the return address
         # was at css, and so our stack frame is supposedly shorter by
         # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
         delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
         self.change_extra_stack_depth = True
         self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
         css_value = eax
     #
     # <--here--> would come a memory fence, if the CPU needed one.
     self.mc.MOV(heap(fastgil), css_value)
     #
     if not we_are_translated():  # for testing: we should not access
         self.mc.ADD(ebp, imm(1))  # ebp any more
Ejemplo n.º 4
0
 def call_releasegil_addr_and_move_real_arguments(self, fastgil):
     from rpython.jit.backend.x86.assembler import heap
     assert self.is_call_release_gil
     #
     # Save this thread's shadowstack pointer into 'ebx',
     # for later comparison
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if gcrootmap:
         if gcrootmap.is_shadow_stack:
             rst = gcrootmap.get_root_stack_top_addr()
             self.mc.MOV(ebx, heap(rst))
     #
     if not self.asm._is_asmgcc():
         # shadowstack: change 'rpy_fastgil' to 0 (it should be
         # non-zero right now).
         self.change_extra_stack_depth = False
         # ^^ note that set_extra_stack_depth() in this case is a no-op
         css_value = imm(0)
     else:
         from rpython.memory.gctransform import asmgcroot
         # build a 'css' structure on the stack: 2 words for the linkage,
         # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
         # total size of JIT_USE_WORDS.  This structure is found at
         # [ESP+css].
         css = -self.get_current_esp() + (
             WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
         assert css >= 2 * WORD
         # Save ebp
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         self.mc.MOV_sr(index_of_ebp, ebp.value)  # MOV [css.ebp], EBP
         # Save the "return address": we pretend that it's css
         self.mc.LEA_rs(eax.value, css)           # LEA eax, [css]
         frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
         self.mc.MOV_sr(frame_ptr, eax.value)     # MOV [css.frame], eax
         # Set up jf_extra_stack_depth to pretend that the return address
         # was at css, and so our stack frame is supposedly shorter by
         # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
         delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
         self.change_extra_stack_depth = True
         self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
         css_value = eax
     #
     # <--here--> would come a memory fence, if the CPU needed one.
     self.mc.MOV(heap(fastgil), css_value)
     #
     if not we_are_translated():        # for testing: we should not access
         self.mc.ADD(ebp, imm(1))       # ebp any more
Ejemplo n.º 5
0
    def test_reuse_scratch_register(self):
        base_addr = 0xFEDCBA9876543210
        cb = LocationCodeBuilder64()
        cb.begin_reuse_scratch_register()
        cb.MOV(ecx, heap(base_addr))
        cb.MOV(ecx, heap(base_addr + 8))
        cb.end_reuse_scratch_register()

        expected_instructions = (
                # mov r11, 0xFEDCBA9876543210
                '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' +
                # mov rcx, [r11]
                '\x49\x8B\x0B' +
                # mov rcx, [r11+8]
                '\x49\x8B\x4B\x08'
        )
        assert cb.getvalue() == expected_instructions
Ejemplo n.º 6
0
    def test_MOV_64bit_address_into_r11(self):
        base_addr = intmask(0xFEDCBA9876543210)
        cb = LocationCodeBuilder64()
        cb.MOV(r11, heap(base_addr))

        expected_instructions = (
            # mov r11, 0xFEDCBA9876543210
            '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' +
            # mov r11, [r11]
            '\x4D\x8B\x1B')
        assert cb.getvalue() == expected_instructions
Ejemplo n.º 7
0
    def test_MOV_64bit_address_into_r11(self):
        base_addr = 0xFEDCBA9876543210
        cb = LocationCodeBuilder64()
        cb.MOV(r11, heap(base_addr))

        expected_instructions = (
                # mov r11, 0xFEDCBA9876543210
                '\x49\xBB\x10\x32\x54\x76\x98\xBA\xDC\xFE' +
                # mov r11, [r11]
                '\x4D\x8B\x1B'
        )
        assert cb.getvalue() == expected_instructions
Ejemplo n.º 8
0
 def call_releasegil_addr_and_move_real_arguments(self, fastgil):
     from rpython.jit.backend.x86.assembler import heap
     assert self.is_call_release_gil
     assert not self.asm._is_asmgcc()
     #
     # Save this thread's shadowstack pointer into 'ebx',
     # for later comparison
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if gcrootmap:
         if gcrootmap.is_shadow_stack:
             rst = gcrootmap.get_root_stack_top_addr()
             self.mc.MOV(ebx, heap(rst))
     #
     # shadowstack: change 'rpy_fastgil' to 0 (it should be
     # non-zero right now).
     #self.change_extra_stack_depth = False
     #
     # <--here--> would come a memory fence, if the CPU needed one.
     self.mc.MOV(heap(fastgil), imm(0))
     #
     if not we_are_translated():  # for testing: we should not access
         self.mc.ADD(ebp, imm(1))  # ebp any more
Ejemplo n.º 9
0
 def call_releasegil_addr_and_move_real_arguments(self, fastgil):
     from rpython.jit.backend.x86.assembler import heap
     #
     if not self.asm._is_asmgcc():
         # shadowstack: change 'rpy_fastgil' to 0 (it should be
         # non-zero right now).
         self.change_extra_stack_depth = False
         css_value = imm(0)
     else:
         from rpython.memory.gctransform import asmgcroot
         # build a 'css' structure on the stack: 2 words for the linkage,
         # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
         # total size of JIT_USE_WORDS.  This structure is found at
         # [ESP+css].
         css = -self.current_esp + (
             WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
         assert css >= 2 * WORD
         # Save ebp
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         self.mc.MOV_sr(index_of_ebp, ebp.value)  # MOV [css.ebp], EBP
         # Save the "return address": we pretend that it's css
         self.mc.LEA_rs(eax.value, css)           # LEA eax, [css]
         frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
         self.mc.MOV_sr(frame_ptr, eax.value)     # MOV [css.frame], eax
         # Set up jf_extra_stack_depth to pretend that the return address
         # was at css, and so our stack frame is supposedly shorter by
         # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words
         delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1
         self.change_extra_stack_depth = True
         self.asm.set_extra_stack_depth(self.mc, -delta * WORD)
         css_value = eax
     #
     self.mc.MOV(heap(fastgil), css_value)
     #
     if not we_are_translated():        # for testing: we should not access
         self.mc.ADD(ebp, imm(1))       # ebp any more; and ignore 'fastgil'
Ejemplo n.º 10
0
 def move_real_result_and_call_reacqgil_addr(self, fastgil):
     from rpython.jit.backend.x86 import rx86
     #
     # check if we need to call the reacqgil() function or not
     # (to acquiring the GIL, remove the asmgcc head from
     # the chained list, etc.)
     mc = self.mc
     restore_edx = False
     if not self.asm._is_asmgcc():
         css = 0
         css_value = imm(0)
         old_value = ecx
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             assert css >= 16
             if self.restype == 'L':  # long long result: eax/edx
                 if not self.result_value_saved_early:
                     mc.MOV_sr(12, edx.value)
                     restore_edx = True
             css_value = edx
             old_value = ecx
         elif IS_X86_64:
             css_value = edi
             old_value = esi
         mc.LEA_rs(css_value.value, css)
     #
     # Use XCHG as an atomic test-and-set-lock.  It also implicitly
     # does a memory barrier.
     mc.MOV(old_value, imm(1))
     if rx86.fits_in_32bits(fastgil):
         mc.XCHG_rj(old_value.value, fastgil)
     else:
         mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
         mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
     mc.CMP(old_value, css_value)
     #
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if bool(gcrootmap) and gcrootmap.is_shadow_stack:
         from rpython.jit.backend.x86.assembler import heap
         #
         # When doing a call_release_gil with shadowstack, there
         # is the risk that the 'rpy_fastgil' was free but the
         # current shadowstack can be the one of a different
         # thread.  So here we check if the shadowstack pointer
         # is still the same as before we released the GIL (saved
         # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
         mc.J_il8(rx86.Conditions['NE'], 0)
         jne_location = mc.get_relative_pos()
         # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
         # state before the XCHG, but the XCHG acquired it by writing 1)
         rst = gcrootmap.get_root_stack_top_addr()
         mc = self.mc
         mc.CMP(ebx, heap(rst))
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
         # revert the rpy_fastgil acquired above, so that the
         # general 'reacqgil_addr' below can acquire it again...
         mc.MOV(heap(fastgil), ecx)
         # patch the JNE above
         offset = mc.get_relative_pos() - jne_location
         assert 0 < offset <= 127
         mc.overwrite(jne_location - 1, chr(offset))
     else:
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
     #
     # Yes, we need to call the reacqgil() function
     if not self.result_value_saved_early:
         self.save_result_value(save_edx=False)
     if self.asm._is_asmgcc():
         if IS_X86_32:
             mc.MOV_sr(4, old_value.value)
             mc.MOV_sr(0, css_value.value)
         # on X86_64, they are already in the right registers
     mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
     if not self.result_value_saved_early:
         self.restore_result_value(save_edx=False)
     #
     # patch the JE above
     offset = mc.get_relative_pos() - je_location
     assert 0 < offset <= 127
     mc.overwrite(je_location - 1, chr(offset))
     #
     if restore_edx:
         mc.MOV_rs(edx.value, 12)  # restore this
     #
     if self.result_value_saved_early:
         self.restore_result_value(save_edx=True)
     #
     if not we_are_translated():  # for testing: now we can accesss
         mc.SUB(ebp, imm(1))  # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP)
         mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Ejemplo n.º 11
0
 def move_real_result_and_call_reacqgil_addr(self, fastgil):
     from rpython.jit.backend.x86 import rx86
     #
     # check if we need to call the reacqgil() function or not
     # (to acquiring the GIL, remove the asmgcc head from
     # the chained list, etc.)
     mc = self.mc
     restore_edx = False
     if not self.asm._is_asmgcc():
         css = 0
         css_value = imm(0)
         old_value = ecx
     else:
         from rpython.memory.gctransform import asmgcroot
         css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
         if IS_X86_32:
             assert css >= 16
             if self.restype == 'L':    # long long result: eax/edx
                 if not self.result_value_saved_early:
                     mc.MOV_sr(12, edx.value)
                     restore_edx = True
             css_value = edx
             old_value = ecx
         elif IS_X86_64:
             css_value = edi
             old_value = esi
         mc.LEA_rs(css_value.value, css)
     #
     # Use XCHG as an atomic test-and-set-lock.  It also implicitly
     # does a memory barrier.
     mc.MOV(old_value, imm(1))
     if rx86.fits_in_32bits(fastgil):
         mc.XCHG_rj(old_value.value, fastgil)
     else:
         mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
         mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
     mc.CMP(old_value, css_value)
     #
     gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
     if bool(gcrootmap) and gcrootmap.is_shadow_stack:
         from rpython.jit.backend.x86.assembler import heap
         #
         # When doing a call_release_gil with shadowstack, there
         # is the risk that the 'rpy_fastgil' was free but the
         # current shadowstack can be the one of a different
         # thread.  So here we check if the shadowstack pointer
         # is still the same as before we released the GIL (saved
         # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
         mc.J_il8(rx86.Conditions['NE'], 0)
         jne_location = mc.get_relative_pos()
         # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
         # state before the XCHG, but the XCHG acquired it by writing 1)
         rst = gcrootmap.get_root_stack_top_addr()
         mc = self.mc
         mc.CMP(ebx, heap(rst))
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
         # revert the rpy_fastgil acquired above, so that the
         # general 'reacqgil_addr' below can acquire it again...
         mc.MOV(heap(fastgil), ecx)
         # patch the JNE above
         offset = mc.get_relative_pos() - jne_location
         assert 0 < offset <= 127
         mc.overwrite(jne_location-1, chr(offset))
     else:
         mc.J_il8(rx86.Conditions['E'], 0)
         je_location = mc.get_relative_pos()
     #
     # Yes, we need to call the reacqgil() function
     if not self.result_value_saved_early:
         self.save_result_value(save_edx=False)
     if self.asm._is_asmgcc():
         if IS_X86_32:
             mc.MOV_sr(4, old_value.value)
             mc.MOV_sr(0, css_value.value)
         # on X86_64, they are already in the right registers
     mc.CALL(imm(follow_jump(self.asm.reacqgil_addr)))
     if not self.result_value_saved_early:
         self.restore_result_value(save_edx=False)
     #
     # patch the JE above
     offset = mc.get_relative_pos() - je_location
     assert 0 < offset <= 127
     mc.overwrite(je_location-1, chr(offset))
     #
     if restore_edx:
         mc.MOV_rs(edx.value, 12)   # restore this
     #
     if self.result_value_saved_early:
         self.restore_result_value(save_edx=True)
     #
     if not we_are_translated():    # for testing: now we can accesss
         mc.SUB(ebp, imm(1))        # ebp again
     #
     # Now that we required the GIL, we can reload a possibly modified ebp
     if self.asm._is_asmgcc():
         # special-case: reload ebp from the css
         from rpython.memory.gctransform import asmgcroot
         index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
         mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Ejemplo n.º 12
0
    def move_real_result_and_call_reacqgil_addr(self, fastgil):
        from rpython.jit.backend.x86 import rx86
        #
        # check if we need to call the reacqgil() function or not
        # (to acquiring the GIL, remove the asmgcc head from
        # the chained list, etc.)
        mc = self.mc
        restore_edx = False
        if not self.asm._is_asmgcc():
            css = 0
            css_value = imm(0)
            old_value = ecx
        else:
            from rpython.memory.gctransform import asmgcroot
            css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
            if IS_X86_32:
                assert css >= 16
                if self.restype == 'L':  # long long result: eax/edx
                    if not self.result_value_saved_early:
                        mc.MOV_sr(12, edx.value)
                        restore_edx = True
                css_value = edx  # note: duplicated in ReacqGilSlowPath
                old_value = ecx  #
            elif IS_X86_64:
                css_value = edi
                old_value = esi
            mc.LEA_rs(css_value.value, css)
        #
        # Use XCHG as an atomic test-and-set-lock.  It also implicitly
        # does a memory barrier.
        mc.MOV(old_value, imm(1))
        if rx86.fits_in_32bits(fastgil):
            mc.XCHG_rj(old_value.value, fastgil)
        else:
            mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
            mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0))
        mc.CMP(old_value, css_value)
        #
        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if bool(gcrootmap) and gcrootmap.is_shadow_stack:
            from rpython.jit.backend.x86.assembler import heap
            #
            # When doing a call_release_gil with shadowstack, there
            # is the risk that the 'rpy_fastgil' was free but the
            # current shadowstack can be the one of a different
            # thread.  So here we check if the shadowstack pointer
            # is still the same as before we released the GIL (saved
            # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
            mc.J_il(rx86.Conditions['NE'], 0xfffff)  # patched later
            early_jump_addr = mc.get_relative_pos(break_basic_block=False)
            # ^^^ this jump will go to almost the same place as the
            # ReacqGilSlowPath() computes, but one instruction farther,
            # i.e. just after the "MOV(heap(fastgil), ecx)".

            # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released'
            # state before the XCHG, but the XCHG acquired it by writing 1)
            rst = gcrootmap.get_root_stack_top_addr()
            mc = self.mc
            mc.CMP(ebx, heap(rst))
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
            sp.early_jump_addr = early_jump_addr
            sp.fastgil = fastgil
        else:
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
        sp.callbuilder = self
        sp.set_continue_addr(mc)
        self.asm.pending_slowpaths.append(sp)
        #
        if restore_edx:
            mc.MOV_rs(edx.value, 12)  # restore this
        #
        if self.result_value_saved_early:
            self.restore_result_value(save_edx=True)
        #
        if not we_are_translated():  # for testing: now we can accesss
            mc.SUB(ebp, imm(1))  # ebp again
        #
        # Now that we required the GIL, we can reload a possibly modified ebp
        if self.asm._is_asmgcc():
            # special-case: reload ebp from the css
            from rpython.memory.gctransform import asmgcroot
            index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP)
            mc.MOV_rs(ebp.value, index_of_ebp)  # MOV EBP, [css.ebp]
Ejemplo n.º 13
0
    def move_real_result_and_call_reacqgil_addr(self, fastgil):
        from rpython.jit.backend.x86 import rx86
        #
        # check if we need to call the reacqgil() function or not
        # (to acquiring the GIL)
        mc = self.mc
        restore_edx = False
        #
        # Make sure we can use 'eax' in the sequel for CMPXCHG
        # On 32-bit, we also need to check if restype is 'L' for long long,
        # in which case we need to save eax and edx because they are both
        # used for the return value.
        if self.restype in (INT, 'L') and not self.result_value_saved_early:
            self.save_result_value(save_edx=self.restype == 'L')
            self.result_value_saved_early = True
        #
        # Use LOCK CMPXCHG as a compare-and-swap with memory barrier.
        tlsreg = self.get_tlofs_reg()
        thread_ident_ofs = lltls.get_thread_ident_offset(self.asm.cpu)
        #
        mc.MOV_rm(ecx.value, (tlsreg.value, thread_ident_ofs))
        mc.XOR_rr(eax.value, eax.value)

        if rx86.fits_in_32bits(fastgil):
            mc.LOCK()
            mc.CMPXCHG_jr(fastgil, ecx.value)
        else:
            mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil)
            mc.LOCK()
            mc.CMPXCHG_mr((X86_64_SCRATCH_REG.value, 0), ecx.value)
        #
        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if bool(gcrootmap):
            from rpython.jit.backend.x86.assembler import heap
            assert gcrootmap.is_shadow_stack
            #
            # When doing a call_release_gil with shadowstack, there
            # is the risk that the 'rpy_fastgil' was free but the
            # current shadowstack can be the one of a different
            # thread.  So here we check if the shadowstack pointer
            # is still the same as before we released the GIL (saved
            # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
            mc.J_il(rx86.Conditions['NZ'], 0xfffff)  # patched later
            early_jump_addr = mc.get_relative_pos(break_basic_block=False)
            # ^^^ this jump will go to almost the same place as the
            # ReacqGilSlowPath() computes, but one instruction further,
            # i.e. just after the "MOV(heap(fastgil), 0)".

            rst = gcrootmap.get_root_stack_top_addr()
            mc = self.mc
            mc.CMP(ebx, heap(rst))
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE'])
            sp.early_jump_addr = early_jump_addr
            sp.fastgil = fastgil
        else:
            sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NZ'])
        sp.callbuilder = self
        sp.set_continue_addr(mc)
        self.asm.pending_slowpaths.append(sp)
        #
        if restore_edx:
            mc.MOV_rs(edx.value, 12)  # restore this
        #
        if self.result_value_saved_early:
            self.restore_result_value(save_edx=True)
        #
        if not we_are_translated():  # for testing: now we can accesss
            mc.SUB(ebp, imm(1))  # ebp again