def move_real_result_and_call_reacqgil_addr(self): # save the result we just got (in eax/eax+edx/st(0)/xmm0) self.save_result_value() # call the reopenstack() function (also reacquiring the GIL) if not self.asm._is_asmgcc(): css = 0 # the helper takes no argument else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: reg = eax elif IS_X86_64: reg = edi self.mc.LEA_rs(reg.value, css) if IS_X86_32: self.mc.MOV_sr(0, reg.value) # self.mc.CALL(imm(self.asm.reacqgil_addr)) # if not we_are_translated(): # for testing: now we can accesss self.mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) self.mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def consider_vec_unpack_i(self, op): assert isinstance(op, VectorOp) index = op.getarg(1) count = op.getarg(2) assert isinstance(index, ConstInt) assert isinstance(count, ConstInt) args = op.getarglist() srcloc = self.make_sure_var_in_reg(op.getarg(0), args) if op.is_vector(): resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) size = op.bytesize else: # unpack into iX box resloc = self.force_allocate_reg(op, args) arg = op.getarg(0) assert isinstance(arg, VectorOp) size = arg.bytesize residx = 0 args = op.getarglist() arglocs = [ resloc, srcloc, imm(residx), imm(index.value), imm(count.value), imm(size) ] self.perform(op, arglocs, resloc)
def write_real_errno(self, save_err): """This occurs just before emit_raw_call(). """ mc = self.mc if handle_lasterror and (save_err & rffi.RFFI_READSAVED_LASTERROR): # must call SetLastError(). There are no registers to save # if we are on 32-bit in this case: no register contains # the arguments to the main function we want to call afterwards. # On win64, though, it's more messy. It could be better optimized # but for now we save (again) the registers containing arguments, # and restore them afterwards. from rpython.rlib.rwin32 import _SetLastError adr = llmemory.cast_ptr_to_adr(_SetLastError) SetLastError_addr = self.asm.cpu.cast_adr_to_int(adr) # if save_err & rffi.RFFI_ALT_ERRNO: lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu) else: lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved if not WIN64: self.save_stack_position() # => edi, callee-saved mc.PUSH_m((tlofsreg.value, lasterror)) mc.CALL(imm(follow_jump(SetLastError_addr))) # restore the stack position without assuming a particular # calling convention of _SetLastError() self.mc.stack_frame_size_delta(-WORD) self.mc.MOV(esp, self.saved_stack_position_reg) else: self.win64_save_register_args() mc.MOV_rm(ecx.value, (tlofsreg.value, lasterror)) mc.CALL(imm(follow_jump(SetLastError_addr))) self.win64_restore_register_args() if save_err & rffi.RFFI_READSAVED_ERRNO: # Just before a call, read '*_errno' and write it into the # real 'errno'. Most registers are free here, including the # callee-saved ones, except 'ebx' and except the ones used to # pass the arguments on x86-64. if save_err & rffi.RFFI_ALT_ERRNO: rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) else: rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) p_errno = llerrno.get_p_errno_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved if IS_X86_32: tmpreg = edx else: tmpreg = r10 # edx is used for 3rd argument mc.MOV_rm(tmpreg.value, (tlofsreg.value, p_errno)) mc.MOV32_rm(eax.value, (tlofsreg.value, rpy_errno)) mc.MOV32_mr((tmpreg.value, 0), eax.value) elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE: # Same, but write zero. p_errno = llerrno.get_p_errno_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved mc.MOV_rm(eax.value, (tlofsreg.value, p_errno)) mc.MOV32_mi((eax.value, 0), 0)
def consider_vec_int_signext(self, op): assert isinstance(op, VectorOp) args = op.getarglist() resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) arg = op.getarg(0) assert isinstance(arg, VectorOp) size = arg.bytesize assert size > 0 self.perform(op, [resloc, imm(size), imm(op.bytesize)], resloc)
def _consider_vec_getarrayitem(self, op): descr = op.getdescr() assert isinstance(descr, ArrayDescr) assert not descr.is_array_of_pointers() and not descr.is_array_of_structs() itemsize, ofs, _ = unpack_arraydescr(descr) integer = not (descr.is_array_of_floats() or descr.getconcrete_type() == FLOAT) aligned = False args = op.getarglist() base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args) ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args) result_loc = self.force_allocate_reg(op) self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs), imm(integer), imm(aligned)], result_loc)
def _consider_vec_getarrayitem(self, op): descr = op.getdescr() assert isinstance(descr, ArrayDescr) assert not descr.is_array_of_pointers() and \ not descr.is_array_of_structs() itemsize, ofs, _ = unpack_arraydescr(descr) integer = not (descr.is_array_of_floats() or descr.getconcrete_type() == FLOAT) aligned = False args = op.getarglist() base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args) ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args) result_loc = self.force_allocate_reg(op) self.perform(op, [base_loc, ofs_loc, imm(itemsize), imm(ofs), imm(integer), imm(aligned)], result_loc)
def load_result(self): """Overridden in CallBuilder32 and CallBuilder64""" if self.ressize == 0: return # void result # use the code in load_from_mem to do the zero- or sign-extension if self.restype == FLOAT: srcloc = xmm0 else: srcloc = eax if self.ressize >= WORD and self.resloc is srcloc: return # no need for any MOV if self.ressize == 1 and isinstance(srcloc, RegLoc): srcloc = srcloc.lowest8bits() self.asm.load_from_mem(self.resloc, srcloc, imm(self.ressize), imm(self.ressign))
def consider_vec_pack_i(self, op): # new_res = vec_pack_i(res, src, index, count) assert isinstance(op, VectorOp) arg = op.getarg(1) index = op.getarg(2) count = op.getarg(3) assert isinstance(index, ConstInt) assert isinstance(count, ConstInt) args = op.getarglist() srcloc = self.make_sure_var_in_reg(arg, args) resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) residx = index.value # where to put it in result? srcidx = 0 arglocs = [resloc, srcloc, imm(residx), imm(srcidx), imm(count.value), imm(op.bytesize)] self.perform(op, arglocs, resloc)
def call_releasegil_addr_and_move_real_arguments(self): initial_esp = self.current_esp self.save_register_arguments() # if not self.asm._is_asmgcc(): # the helper takes no argument self.change_extra_stack_depth = False else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a # total size of JIT_USE_WORDS. This structure is found at # [ESP+css]. css = -self.current_esp + ( WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)) assert css >= 2 * WORD # Save ebp index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP # Save the "return address": we pretend that it's css if IS_X86_32: reg = eax elif IS_X86_64: reg = edi self.mc.LEA_rs(reg.value, css) # LEA reg, [css] frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR) self.mc.MOV_sr(frame_ptr, reg.value) # MOV [css.frame], reg # Set up jf_extra_stack_depth to pretend that the return address # was at css, and so our stack frame is supposedly shorter by # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1 self.change_extra_stack_depth = True self.asm.set_extra_stack_depth(self.mc, -delta * WORD) # Call the closestack() function (also releasing the GIL) # with 'reg' as argument if IS_X86_32: self.subtract_esp_aligned(1) self.mc.MOV_sr(0, reg.value) #else: # on x86_64, reg is edi so that it is already correct # self.mc.CALL(imm(self.asm.releasegil_addr)) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more # self.restore_register_arguments() self.restore_stack_pointer(initial_esp)
def consider_vec_logic(self, op): lhs = op.getarg(0) assert isinstance(lhs, VectorOp) args = op.getarglist() source = self.make_sure_var_in_reg(op.getarg(1), args) result = self.xrm.force_result_in_reg(op, op.getarg(0), args) self.perform(op, [source, imm(lhs.bytesize)], result)
def consider_vec_int_is_true(self, op): args = op.getarglist() arg = op.getarg(0) assert isinstance(arg, VectorOp) argloc = self.loc(arg) resloc = self.xrm.force_result_in_reg(op, arg, args) self.perform(op, [resloc, imm(arg.bytesize)], None)
def consider_vec_arith(self, op): lhs = op.getarg(0) size = lhs.bytesize args = op.getarglist() loc1 = self.make_sure_var_in_reg(op.getarg(1), args) loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args) self.perform(op, [loc0, loc1, imm(size)], loc0)
def consider_vec_int_is_true(self, op): args = op.getarglist() arg = op.getarg(0) assert isinstance(arg, VectorOp) argloc = self.loc(arg) resloc = self.xrm.force_result_in_reg(op, arg, args) self.perform(op, [resloc,imm(arg.bytesize)], None)
def generate_body(self, assembler, mc): if self.early_jump_addr != 0: # This slow-path has two entry points, with two # conditional jumps. We can jump to the regular start # of this slow-path with the 2nd conditional jump. Or, # we can jump past the "MOV(heap(fastgil), ecx)" # instruction from the 1st conditional jump. # This instruction reverts the rpy_fastgil acquired # previously, so that the general 'reacqgil_addr' # function can acquire it again. It must only be done # if we actually succeeded in acquiring rpy_fastgil. from rpython.jit.backend.x86.assembler import heap mc.MOV(heap(self.fastgil), ecx) offset = mc.get_relative_pos() - self.early_jump_addr mc.overwrite32(self.early_jump_addr - 4, offset) # scratch register forgotten here, by get_relative_pos() # call the reacqgil() function cb = self.callbuilder if not cb.result_value_saved_early: cb.save_result_value(save_edx=False) if assembler._is_asmgcc(): if IS_X86_32: css_value = edx old_value = ecx mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(follow_jump(assembler.reacqgil_addr))) if not cb.result_value_saved_early: cb.restore_result_value(save_edx=False)
def call_releasegil_addr_and_move_real_arguments(self, fastgil): from rpython.jit.backend.x86.assembler import heap assert self.is_call_release_gil # # Save this thread's shadowstack pointer into 'ebx', # for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.MOV(ebx, heap(rst)) # if not self.asm._is_asmgcc(): # shadowstack: change 'rpy_fastgil' to 0 (it should be # non-zero right now). self.change_extra_stack_depth = False # ^^ note that set_extra_stack_depth() in this case is a no-op css_value = imm(0) else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a # total size of JIT_USE_WORDS. This structure is found at # [ESP+css]. css = -self.get_current_esp() + ( WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)) assert css >= 2 * WORD # Save ebp index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP) self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP # Save the "return address": we pretend that it's css self.mc.LEA_rs(eax.value, css) # LEA eax, [css] frame_ptr = css + WORD * (2 + asmgcroot.FRAME_PTR) self.mc.MOV_sr(frame_ptr, eax.value) # MOV [css.frame], eax # Set up jf_extra_stack_depth to pretend that the return address # was at css, and so our stack frame is supposedly shorter by # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1 self.change_extra_stack_depth = True self.asm.set_extra_stack_depth(self.mc, -delta * WORD) css_value = eax # # <--here--> would come a memory fence, if the CPU needed one. self.mc.MOV(heap(fastgil), css_value) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more
def call_releasegil_addr_and_move_real_arguments(self, fastgil): from rpython.jit.backend.x86.assembler import heap assert self.is_call_release_gil # # Save this thread's shadowstack pointer into 'ebx', # for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.MOV(ebx, heap(rst)) # if not self.asm._is_asmgcc(): # shadowstack: change 'rpy_fastgil' to 0 (it should be # non-zero right now). self.change_extra_stack_depth = False # ^^ note that set_extra_stack_depth() in this case is a no-op css_value = imm(0) else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a # total size of JIT_USE_WORDS. This structure is found at # [ESP+css]. css = -self.get_current_esp() + ( WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)) assert css >= 2 * WORD # Save ebp index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP # Save the "return address": we pretend that it's css self.mc.LEA_rs(eax.value, css) # LEA eax, [css] frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR) self.mc.MOV_sr(frame_ptr, eax.value) # MOV [css.frame], eax # Set up jf_extra_stack_depth to pretend that the return address # was at css, and so our stack frame is supposedly shorter by # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1 self.change_extra_stack_depth = True self.asm.set_extra_stack_depth(self.mc, -delta * WORD) css_value = eax # # <--here--> would come a memory fence, if the CPU needed one. self.mc.MOV(heap(fastgil), css_value) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more
def consider_vec_float_eq(self, op): assert isinstance(op, VectorOp) lhs = op.getarg(0) assert isinstance(lhs, VectorOp) args = op.getarglist() rhsloc = self.make_sure_var_in_reg(op.getarg(1), args) lhsloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) self.perform(op, [lhsloc, rhsloc, imm(lhs.bytesize)], lhsloc)
def consider_vec_arith(self, op): lhs = op.getarg(0) assert isinstance(op, VectorOp) size = op.bytesize args = op.getarglist() loc1 = self.make_sure_var_in_reg(op.getarg(1), args) loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args) self.perform(op, [loc0, loc1, imm(size)], loc0)
def consider_vec_store(self, op): # TODO descr = op.getdescr() assert isinstance(descr, ArrayDescr) assert not descr.is_array_of_pointers() and \ not descr.is_array_of_structs() itemsize, _, _ = unpack_arraydescr(descr) args = op.getarglist() base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args) value_loc = self.make_sure_var_in_reg(op.getarg(2), args) ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args) scale = get_scale(op.getarg(3).getint()) ofs = op.getarg(4).getint() integer = not (descr.is_array_of_floats() or \ descr.getconcrete_type() == FLOAT) self.perform_discard(op, [base_loc, ofs_loc, value_loc, imm(itemsize), imm(scale), imm(ofs), imm(integer)])
def consider_vec_expand_i(self, op): arg = op.getarg(0) args = op.getarglist() if arg.is_constant(): srcloc = self.rm.convert_to_imm(arg) else: srcloc = self.make_sure_var_in_reg(arg, args) resloc = self.xrm.force_allocate_reg(op, args) self.perform(op, [srcloc, imm(op.bytesize)], resloc)
def consider_vec_expand_i(self, op): assert isinstance(op, VectorOp) arg = op.getarg(0) args = op.getarglist() if arg.is_constant(): srcloc = self.rm.convert_to_imm(arg) else: srcloc = self.make_sure_var_in_reg(arg, args) resloc = self.xrm.force_allocate_reg(op, args) self.perform(op, [srcloc, imm(op.bytesize)], resloc)
def consider_vec_unpack_i(self, op): index = op.getarg(1) count = op.getarg(2) assert isinstance(index, ConstInt) assert isinstance(count, ConstInt) args = op.getarglist() srcloc = self.make_sure_var_in_reg(op.getarg(0), args) if op.is_vector(): resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) size = op.bytesize else: # unpack into iX box resloc = self.force_allocate_reg(op, args) arg = op.getarg(0) size = arg.bytesize residx = 0 args = op.getarglist() arglocs = [resloc, srcloc, imm(residx), imm(index.value), imm(count.value), imm(size)] self.perform(op, arglocs, resloc)
def consider_vec_expand_f(self, op): arg = op.getarg(0) args = op.getarglist() if arg.is_constant(): resloc = self.xrm.force_allocate_reg(op) srcloc = self.xrm.expand_float(op.bytesize, arg) else: resloc = self.xrm.force_result_in_reg(op, arg, args) srcloc = resloc self.perform(op, [srcloc, imm(op.bytesize)], resloc)
def consider_vec_float_eq(self, op): assert isinstance(op, VectorOp) lhs = op.getarg(0) assert isinstance(lhs, VectorOp) args = op.getarglist() # we need to use xmm0 lhsloc = self.enforce_var_in_vector_reg(op.getarg(0), args, selected_reg=xmm0) rhsloc = self.make_sure_var_in_reg(op.getarg(1), args) resloc = self.force_allocate_vector_reg_or_cc(op) self.perform(op, [lhsloc, rhsloc, imm(lhs.bytesize)], resloc)
def consider_vec_expand_f(self, op): assert isinstance(op, VectorOp) arg = op.getarg(0) args = op.getarglist() if arg.is_constant(): resloc = self.xrm.force_allocate_reg(op) srcloc = self.xrm.expand_float(op.bytesize, arg) else: resloc = self.xrm.force_result_in_reg(op, arg, args) srcloc = resloc self.perform(op, [srcloc, imm(op.bytesize)], resloc)
def call_releasegil_addr_and_move_real_arguments(self, fastgil): from rpython.jit.backend.x86.assembler import heap assert self.is_call_release_gil assert not self.asm._is_asmgcc() # # Save this thread's shadowstack pointer into 'ebx', # for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if gcrootmap: if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.MOV(ebx, heap(rst)) # # shadowstack: change 'rpy_fastgil' to 0 (it should be # non-zero right now). #self.change_extra_stack_depth = False # # <--here--> would come a memory fence, if the CPU needed one. self.mc.MOV(heap(fastgil), imm(0)) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more
def write_real_errno(self, save_err): """This occurs just before emit_raw_call(). """ mc = self.mc if handle_lasterror and (save_err & rffi.RFFI_READSAVED_LASTERROR): # must call SetLastError(). There are no registers to save # because we are on 32-bit in this case: no register contains # the arguments to the main function we want to call afterwards. from rpython.rlib.rwin32 import _SetLastError adr = llmemory.cast_ptr_to_adr(_SetLastError) SetLastError_addr = self.asm.cpu.cast_adr_to_int(adr) assert isinstance(self, CallBuilder32) # Windows 32-bit only # if save_err & rffi.RFFI_ALT_ERRNO: lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu) else: lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi, callee-saved self.save_stack_position() # => edi, callee-saved mc.PUSH_m((tlofsreg.value, lasterror)) mc.CALL(imm(follow_jump(SetLastError_addr))) # restore the stack position without assuming a particular # calling convention of _SetLastError() self.mc.stack_frame_size_delta(-WORD) self.mc.MOV(esp, self.saved_stack_position_reg) if save_err & rffi.RFFI_READSAVED_ERRNO: # Just before a call, read '*_errno' and write it into the # real 'errno'. Most registers are free here, including the # callee-saved ones, except 'ebx' and except the ones used to # pass the arguments on x86-64. if save_err & rffi.RFFI_ALT_ERRNO: rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) else: rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) p_errno = llerrno.get_p_errno_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved if IS_X86_32: tmpreg = edx else: tmpreg = r11 # edx is used for 3rd argument mc.MOV_rm(tmpreg.value, (tlofsreg.value, p_errno)) mc.MOV32_rm(eax.value, (tlofsreg.value, rpy_errno)) mc.MOV32_mr((tmpreg.value, 0), eax.value) elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE: # Same, but write zero. p_errno = llerrno.get_p_errno_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved mc.MOV_rm(eax.value, (tlofsreg.value, p_errno)) mc.MOV32_mi((eax.value, 0), 0)
def __init__(self, assembler, fnloc, arglocs, resloc=eax, restype=INT, ressize=WORD): AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs, resloc, restype, ressize) # Avoid tons of issues with a non-immediate fnloc by sticking it # as an extra argument if needed if isinstance(fnloc, ImmedLoc): self.fnloc_is_immediate = True self.fnloc = imm(follow_jump(fnloc.value)) else: self.fnloc_is_immediate = False self.fnloc = None self.arglocs = arglocs + [fnloc] self.start_frame_size = self.mc._frame_size
def __init__(self, assembler, fnloc, arglocs, resloc=eax, restype=INT, ressize=WORD): AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs, resloc, restype, ressize) # Avoid tons of issues with a non-immediate fnloc by sticking it # as an extra argument if needed if isinstance(fnloc, ImmedLoc): self.fnloc_is_immediate = True self.fnloc = imm(follow_jump(fnloc.value)) else: self.fnloc_is_immediate = False self.fnloc = None self.arglocs = arglocs + [fnloc] self.current_esp = 0 # 0 or (usually) negative, counted in bytes
def write_real_errno(self, save_err): """This occurs just before emit_raw_call(). """ mc = self.mc if handle_lasterror and (save_err & rffi.RFFI_READSAVED_LASTERROR): # must call SetLastError(). There are no registers to save # because we are on 32-bit in this case: no register contains # the arguments to the main function we want to call afterwards. from rpython.rlib.rwin32 import _SetLastError adr = llmemory.cast_ptr_to_adr(_SetLastError) SetLastError_addr = self.asm.cpu.cast_adr_to_int(adr) assert isinstance(self, CallBuilder32) # Windows 32-bit only # if save_err & rffi.RFFI_ALT_ERRNO: lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu) else: lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi, callee-saved self.save_stack_position() # => edi, callee-saved mc.PUSH_m((tlofsreg.value, lasterror)) mc.CALL(imm(follow_jump(SetLastError_addr))) # restore the stack position without assuming a particular # calling convention of _SetLastError() self.mc.MOV(esp, self.saved_stack_position_reg) if save_err & rffi.RFFI_READSAVED_ERRNO: # Just before a call, read '*_errno' and write it into the # real 'errno'. Most registers are free here, including the # callee-saved ones, except 'ebx' and except the ones used to # pass the arguments on x86-64. if save_err & rffi.RFFI_ALT_ERRNO: rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) else: rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) p_errno = llerrno.get_p_errno_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved if IS_X86_32: tmpreg = edx else: tmpreg = r11 # edx is used for 3rd argument mc.MOV_rm(tmpreg.value, (tlofsreg.value, p_errno)) mc.MOV32_rm(eax.value, (tlofsreg.value, rpy_errno)) mc.MOV32_mr((tmpreg.value, 0), eax.value) elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE: # Same, but write zero. p_errno = llerrno.get_p_errno_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12, callee-saved mc.MOV_rm(eax.value, (tlofsreg.value, p_errno)) mc.MOV32_mi((eax.value, 0), 0)
def call_releasegil_addr_and_move_real_arguments(self, fastgil): from rpython.jit.backend.x86.assembler import heap # if not self.asm._is_asmgcc(): # shadowstack: change 'rpy_fastgil' to 0 (it should be # non-zero right now). self.change_extra_stack_depth = False css_value = imm(0) else: from rpython.memory.gctransform import asmgcroot # build a 'css' structure on the stack: 2 words for the linkage, # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a # total size of JIT_USE_WORDS. This structure is found at # [ESP+css]. css = -self.current_esp + ( WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)) assert css >= 2 * WORD # Save ebp index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) self.mc.MOV_sr(index_of_ebp, ebp.value) # MOV [css.ebp], EBP # Save the "return address": we pretend that it's css self.mc.LEA_rs(eax.value, css) # LEA eax, [css] frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR) self.mc.MOV_sr(frame_ptr, eax.value) # MOV [css.frame], eax # Set up jf_extra_stack_depth to pretend that the return address # was at css, and so our stack frame is supposedly shorter by # (PASS_ON_MY_FRAME-JIT_USE_WORDS+1) words delta = PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS + 1 self.change_extra_stack_depth = True self.asm.set_extra_stack_depth(self.mc, -delta * WORD) css_value = eax # self.mc.MOV(heap(fastgil), css_value) # if not we_are_translated(): # for testing: we should not access self.mc.ADD(ebp, imm(1)) # ebp any more; and ignore 'fastgil'
def read_real_errno(self, save_err): """This occurs after emit_raw_call() and after restore_stack_pointer(). """ mc = self.mc if save_err & rffi.RFFI_SAVE_ERRNO: # Just after a call, read the real 'errno' and save a copy of # it inside our thread-local '*_errno'. Most registers are # free here, including the callee-saved ones, except 'ebx'. # The tlofs register might have been loaded earlier and is # callee-saved, so it does not need to be reloaded. if save_err & rffi.RFFI_ALT_ERRNO: rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) else: rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) p_errno = llerrno.get_p_errno_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12 (possibly reused) mc.MOV_rm(edi.value, (tlofsreg.value, p_errno)) mc.MOV32_rm(edi.value, (edi.value, 0)) mc.MOV32_mr((tlofsreg.value, rpy_errno), edi.value) if handle_lasterror and ( save_err & (rffi.RFFI_SAVE_LASTERROR | rffi.RFFI_SAVE_WSALASTERROR)): if save_err & rffi.RFFI_SAVE_LASTERROR: from rpython.rlib.rwin32 import _GetLastError adr = llmemory.cast_ptr_to_adr(_GetLastError) else: from rpython.rlib._rsocket_rffi import _WSAGetLastError adr = llmemory.cast_ptr_to_adr(_WSAGetLastError) GetLastError_addr = self.asm.cpu.cast_adr_to_int(adr) assert isinstance(self, CallBuilder32) # Windows 32-bit only # if save_err & rffi.RFFI_ALT_ERRNO: lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu) else: lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu) self.save_result_value(save_edx=True) # save eax/edx/xmm0 self.result_value_saved_early = True mc.CALL(imm(follow_jump(GetLastError_addr))) # tlofsreg = self.get_tlofs_reg() # => esi (possibly reused) mc.MOV32_mr((tlofsreg.value, lasterror), eax.value)
def read_real_errno(self, save_err): """This occurs after emit_raw_call() and after restore_stack_pointer(). """ mc = self.mc if save_err & rffi.RFFI_SAVE_ERRNO: # Just after a call, read the real 'errno' and save a copy of # it inside our thread-local '*_errno'. Most registers are # free here, including the callee-saved ones, except 'ebx'. # The tlofs register might have been loaded earlier and is # callee-saved, so it does not need to be reloaded. if save_err & rffi.RFFI_ALT_ERRNO: rpy_errno = llerrno.get_alt_errno_offset(self.asm.cpu) else: rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) p_errno = llerrno.get_p_errno_offset(self.asm.cpu) tlofsreg = self.get_tlofs_reg() # => esi or r12 (possibly reused) mc.MOV_rm(ecx.value, (tlofsreg.value, p_errno)) mc.MOV32_rm(ecx.value, (ecx.value, 0)) mc.MOV32_mr((tlofsreg.value, rpy_errno), ecx.value) if handle_lasterror and (save_err & (rffi.RFFI_SAVE_LASTERROR | rffi.RFFI_SAVE_WSALASTERROR)): if save_err & rffi.RFFI_SAVE_LASTERROR: from rpython.rlib.rwin32 import _GetLastError adr = llmemory.cast_ptr_to_adr(_GetLastError) else: from rpython.rlib._rsocket_rffi import _WSAGetLastError adr = llmemory.cast_ptr_to_adr(_WSAGetLastError) GetLastError_addr = self.asm.cpu.cast_adr_to_int(adr) assert isinstance(self, CallBuilder32) # Windows 32-bit only # if save_err & rffi.RFFI_ALT_ERRNO: lasterror = llerrno.get_alt_lasterror_offset(self.asm.cpu) else: lasterror = llerrno.get_rpy_lasterror_offset(self.asm.cpu) self.save_result_value(save_edx=True) # save eax/edx/xmm0 self.result_value_saved_early = True mc.CALL(imm(follow_jump(GetLastError_addr))) # tlofsreg = self.get_tlofs_reg() # => esi (possibly reused) mc.MOV32_mr((tlofsreg.value, lasterror), eax.value)
def consider_vec_arith_unary(self, op): lhs = op.getarg(0) assert isinstance(lhs, VectorOp) args = op.getarglist() res = self.xrm.force_result_in_reg(op, op.getarg(0), args) self.perform(op, [res, imm(lhs.bytesize)], res)
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx old_value = ecx elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il8(rx86.Conditions['NE'], 0) jne_location = mc.get_relative_pos() # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... mc.MOV(heap(fastgil), ecx) # patch the JNE above offset = mc.get_relative_pos() - jne_location assert 0 < offset <= 127 mc.overwrite(jne_location - 1, chr(offset)) else: mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # # Yes, we need to call the reacqgil() function if not self.result_value_saved_early: self.save_result_value(save_edx=False) if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(follow_jump(self.asm.reacqgil_addr))) if not self.result_value_saved_early: self.restore_result_value(save_edx=False) # # patch the JE above offset = mc.get_relative_pos() - je_location assert 0 < offset <= 127 mc.overwrite(je_location - 1, chr(offset)) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def consider_vec_int_signext(self, op): args = op.getarglist() resloc = self.xrm.force_result_in_reg(op, op.getarg(0), args) size = op.cast_from_bytesize() assert size > 0 self.perform(op, [resloc, imm(size), imm(op.bytesize)], resloc)
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx old_value = ecx elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # # Yes, we need to call the reacqgil() function self.save_result_value_reacq() if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(self.asm.reacqgil_addr)) self.restore_result_value_reacq() # # patch the JE above offset = mc.get_relative_pos() - je_location assert 0 < offset <= 127 mc.overwrite(je_location-1, chr(offset)) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx old_value = ecx elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il8(rx86.Conditions['NE'], 0) jne_location = mc.get_relative_pos() # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... mc.MOV(heap(fastgil), ecx) # patch the JNE above offset = mc.get_relative_pos() - jne_location assert 0 < offset <= 127 mc.overwrite(jne_location-1, chr(offset)) else: mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # # Yes, we need to call the reacqgil() function if not self.result_value_saved_early: self.save_result_value(save_edx=False) if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(follow_jump(self.asm.reacqgil_addr))) if not self.result_value_saved_early: self.restore_result_value(save_edx=False) # # patch the JE above offset = mc.get_relative_pos() - je_location assert 0 < offset <= 127 mc.overwrite(je_location-1, chr(offset)) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx # note: duplicated in ReacqGilSlowPath old_value = ecx # elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il(rx86.Conditions['NE'], 0xfffff) # patched later early_jump_addr = mc.get_relative_pos(break_basic_block=False) # ^^^ this jump will go to almost the same place as the # ReacqGilSlowPath() computes, but one instruction farther, # i.e. just after the "MOV(heap(fastgil), ecx)". # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.early_jump_addr = early_jump_addr sp.fastgil = fastgil else: sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.callbuilder = self sp.set_continue_addr(mc) self.asm.pending_slowpaths.append(sp) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL) mc = self.mc restore_edx = False # # Make sure we can use 'eax' in the sequel for CMPXCHG # On 32-bit, we also need to check if restype is 'L' for long long, # in which case we need to save eax and edx because they are both # used for the return value. if self.restype in (INT, 'L') and not self.result_value_saved_early: self.save_result_value(save_edx=self.restype == 'L') self.result_value_saved_early = True # # Use LOCK CMPXCHG as a compare-and-swap with memory barrier. tlsreg = self.get_tlofs_reg() thread_ident_ofs = lltls.get_thread_ident_offset(self.asm.cpu) # mc.MOV_rm(ecx.value, (tlsreg.value, thread_ident_ofs)) mc.XOR_rr(eax.value, eax.value) if rx86.fits_in_32bits(fastgil): mc.LOCK() mc.CMPXCHG_jr(fastgil, ecx.value) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.LOCK() mc.CMPXCHG_mr((X86_64_SCRATCH_REG.value, 0), ecx.value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap): from rpython.jit.backend.x86.assembler import heap assert gcrootmap.is_shadow_stack # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il(rx86.Conditions['NZ'], 0xfffff) # patched later early_jump_addr = mc.get_relative_pos(break_basic_block=False) # ^^^ this jump will go to almost the same place as the # ReacqGilSlowPath() computes, but one instruction further, # i.e. just after the "MOV(heap(fastgil), 0)". rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.early_jump_addr = early_jump_addr sp.fastgil = fastgil else: sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NZ']) sp.callbuilder = self sp.set_continue_addr(mc) self.asm.pending_slowpaths.append(sp) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again