def INSN(self, loc1, loc2): code1 = loc1.location_code() code2 = loc2.location_code() # You can pass in the scratch register as a location, but you # must be careful not to combine it with location types that # might need to use the scratch register themselves. if loc2 is X86_64_SCRATCH_REG: if code1 == 'j': assert (name.startswith("MOV") and rx86.fits_in_32bits(loc1.value_j())) if loc1 is X86_64_SCRATCH_REG and not name.startswith("MOV"): assert code2 not in ('j', 'i') for possible_code2 in unrolling_location_codes: if not has_implementation_for('?', possible_code2): continue if code2 == possible_code2: val2 = getattr(loc2, "value_" + possible_code2)() # # Fake out certain operations for x86_64 if self.WORD == 8 and possible_code2 == 'i' and not rx86.fits_in_32bits( val2): insn_with_64_bit_immediate(self, loc1, loc2) return # # Regular case for possible_code1 in unrolling_location_codes: if not has_implementation_for(possible_code1, possible_code2): continue if code1 == possible_code1: val1 = getattr(loc1, "value_" + possible_code1)() # More faking out of certain operations for x86_64 fits32 = rx86.fits_in_32bits if possible_code1 == 'j' and not fits32(val1): val1 = self._addr_as_reg_offset(val1) invoke(self, "m" + possible_code2, val1, val2) return if possible_code2 == 'j' and not fits32(val2): val2 = self._addr_as_reg_offset(val2) invoke(self, possible_code1 + "m", val1, val2) return if possible_code1 == 'm' and not fits32(val1[1]): val1 = self._fix_static_offset_64_m(val1) if possible_code2 == 'm' and not fits32(val2[1]): val2 = self._fix_static_offset_64_m(val2) if possible_code1 == 'a' and not fits32(val1[3]): val1 = self._fix_static_offset_64_a(val1) if possible_code2 == 'a' and not fits32(val2[3]): val2 = self._fix_static_offset_64_a(val2) invoke(self, possible_code1 + possible_code2, val1, val2) return _missing_binary_insn(name, code1, code2)
def INSN(self, loc1, loc2): code1 = loc1.location_code() code2 = loc2.location_code() # You can pass in the scratch register as a location, but you # must be careful not to combine it with location types that # might need to use the scratch register themselves. if loc2 is X86_64_SCRATCH_REG: if code1 == 'j': assert (name.startswith("MOV") and rx86.fits_in_32bits(loc1.value_j())) if loc1 is X86_64_SCRATCH_REG and not name.startswith("MOV"): assert code2 not in ('j', 'i') for possible_code2 in unrolling_location_codes: if not has_implementation_for('?', possible_code2): continue if code2 == possible_code2: val2 = getattr(loc2, "value_" + possible_code2)() # # Fake out certain operations for x86_64 if self.WORD == 8 and possible_code2 == 'i' and not rx86.fits_in_32bits(val2): insn_with_64_bit_immediate(self, loc1, loc2) return # # Regular case for possible_code1 in unrolling_location_codes: if not has_implementation_for(possible_code1, possible_code2): continue if code1 == possible_code1: val1 = getattr(loc1, "value_" + possible_code1)() # More faking out of certain operations for x86_64 fits32 = rx86.fits_in_32bits if possible_code1 == 'j' and not fits32(val1): val1 = self._addr_as_reg_offset(val1) invoke(self, "m" + possible_code2, val1, val2) return if possible_code2 == 'j' and not fits32(val2): val2 = self._addr_as_reg_offset(val2) invoke(self, possible_code1 + "m", val1, val2) return if possible_code1 == 'm' and not fits32(val1[1]): val1 = self._fix_static_offset_64_m(val1) if possible_code2 == 'm' and not fits32(val2[1]): val2 = self._fix_static_offset_64_m(val2) if possible_code1 == 'a' and not fits32(val1[3]): val1 = self._fix_static_offset_64_a(val1) if possible_code2 == 'a' and not fits32(val2[3]): val2 = self._fix_static_offset_64_a(val2) invoke(self, possible_code1 + possible_code2, val1, val2) return _missing_binary_insn(name, code1, code2)
def test_follow_jump_instructions_32(): buf = lltype.malloc(rffi.CCHARP.TO, 80, flavor='raw') raw = rffi.cast(lltype.Signed, buf) if not fits_in_32bits(raw): lltype.free(buf, flavor='raw') py.test.skip("not testable") mc = Fake32CodeBlockWrapper(); mc.WORD = 4; mc.relocations = [] mc.RET() mc.copy_to_raw_memory(raw) mc = Fake32CodeBlockWrapper(); mc.WORD = 4; mc.relocations = [] assert follow_jump(raw) == raw mc.JMP(imm(raw)) mc.copy_to_raw_memory(raw + 20) assert buf[20] == '\xE9' # JMP assert buf[21] == '\xE7' # -25 assert buf[22] == '\xFF' assert buf[23] == '\xFF' assert buf[24] == '\xFF' mc = Fake32CodeBlockWrapper(); mc.WORD = 4; mc.relocations = [] assert follow_jump(raw + 20) == raw mc.JMP(imm(raw)) mc.copy_to_raw_memory(raw + 40) assert buf[40] == '\xE9' # JMP assert buf[41] == '\xD3' # -45 assert buf[42] == '\xFF' assert buf[43] == '\xFF' assert buf[44] == '\xFF' assert follow_jump(raw + 40) == raw lltype.free(buf, flavor='raw')
def insn_with_64_bit_immediate(self, loc1, loc2): # These are the worst cases: val2 = loc2.value_i() if name == 'MOV' and isinstance(loc1, RegLoc): self.MOV_ri(loc1.value, val2) return code1 = loc1.location_code() if code1 == 'j': checkvalue = loc1.value_j() elif code1 == 'm': checkvalue = loc1.value_m()[1] elif code1 == 'a': checkvalue = loc1.value_a()[3] else: checkvalue = 0 if not rx86.fits_in_32bits(checkvalue): # INSN_ji, and both operands are 64-bit; or INSN_mi or INSN_ai # and the constant offset in the address is 64-bit. # Hopefully this doesn't happen too often freereg = loc1.find_unused_reg() self.PUSH_r(freereg.value) self.MOV_ri(freereg.value, val2) INSN(self, loc1, freereg) self.POP_r(freereg.value) else: # For this case, we should not need the scratch register more than here. self._load_scratch(val2) INSN(self, loc1, X86_64_SCRATCH_REG)
def _load_scratch(self, value): if self._scratch_register_value != -1: if self._scratch_register_value == value: #print '_load_scratch(%x) [REUSED]' % (value,) return offset = r_uint(value) - r_uint(self._scratch_register_value) offset = intmask(offset) if rx86.fits_in_32bits(offset): #print '_load_scratch(%x) [LEA r11+%d]' % (value, offset) #global COUNT_ #try: # COUNT_ += 1 #except NameError: # COUNT_ = 1 #if COUNT_ % 182 == 0: # import pdb;pdb.set_trace() self.LEA_rm(X86_64_SCRATCH_REG.value, (X86_64_SCRATCH_REG.value, offset)) self._scratch_register_value = value return #print '_load_scratch(%x) [too far]' % (value,) #else: # print '_load_scratch(%x) [new]' % (value,) self._scratch_register_value = value self.MOV_ri(X86_64_SCRATCH_REG.value, value)
def test_follow_jump_instructions_32(): buf = lltype.malloc(rffi.CCHARP.TO, 80, flavor='raw') raw = rffi.cast(lltype.Signed, buf) if not fits_in_32bits(raw): lltype.free(buf, flavor='raw') py.test.skip("not testable") mc = Fake32CodeBlockWrapper() mc.WORD = 4 mc.relocations = [] mc.RET() mc.copy_to_raw_memory(raw) mc = Fake32CodeBlockWrapper() mc.WORD = 4 mc.relocations = [] assert follow_jump(raw) == raw mc.JMP(imm(raw)) mc.copy_to_raw_memory(raw + 20) assert buf[20] == '\xE9' # JMP assert buf[21] == '\xE7' # -25 assert buf[22] == '\xFF' assert buf[23] == '\xFF' assert buf[24] == '\xFF' mc = Fake32CodeBlockWrapper() mc.WORD = 4 mc.relocations = [] assert follow_jump(raw + 20) == raw mc.JMP(imm(raw)) mc.copy_to_raw_memory(raw + 40) assert buf[40] == '\xE9' # JMP assert buf[41] == '\xD3' # -45 assert buf[42] == '\xFF' assert buf[43] == '\xFF' assert buf[44] == '\xFF' assert follow_jump(raw + 40) == raw lltype.free(buf, flavor='raw')
def INSN(self, loc): code = loc.location_code() for possible_code in unrolling_location_codes: if code == possible_code: val = getattr(loc, "value_" + possible_code)() if self.WORD == 8 and possible_code == 'i' and not rx86.fits_in_32bits(val): self._load_scratch(val) _rx86_getattr(self, name + "_r")(X86_64_SCRATCH_REG.value) else: methname = name + "_" + possible_code _rx86_getattr(self, methname)(val)
def test_bug_setfield_64bit(self): if WORD == 4: py.test.skip("only for 64 bits") TP = lltype.GcStruct('S', ('i', lltype.Signed)) ofsi = self.cpu.fielddescrof(TP, 'i') for i in range(500): p = lltype.malloc(TP) addr = rffi.cast(lltype.Signed, p) if fits_in_32bits(addr): break # fitting in 32 bits, good else: py.test.skip("cannot get a 32-bit pointer") res = ConstPtr(rffi.cast(llmemory.GCREF, addr)) self.execute_operation(rop.SETFIELD_RAW, [res, ConstInt(3**33)], 'void', ofsi) assert p.i == 3**33
def test_bug_setfield_64bit(self): if WORD == 4: py.test.skip("only for 64 bits") TP = lltype.GcStruct('S', ('i', lltype.Signed)) ofsi = self.cpu.fielddescrof(TP, 'i') for i in range(500): p = lltype.malloc(TP) addr = rffi.cast(lltype.Signed, p) if fits_in_32bits(addr): break # fitting in 32 bits, good else: py.test.skip("cannot get a 32-bit pointer") res = ConstPtr(rffi.cast(llmemory.GCREF, addr)) self.execute_operation(rop.SETFIELD_RAW, [res, ConstInt(3**33)], 'void', ofsi) assert p.i == 3**33
def _addr_as_reg_offset(self, addr): # Encodes a (64-bit) address as an offset from the scratch register. # If we are within a "reuse_scratch_register" block, we remember the # last value we loaded to the scratch register and encode the address # as an offset from that if we can if self._scratch_register_known: offset = addr - self._scratch_register_value if rx86.fits_in_32bits(offset): return (X86_64_SCRATCH_REG.value, offset) # else: fall through if self._reuse_scratch_register: self._scratch_register_known = True self._scratch_register_value = addr self.MOV_ri(X86_64_SCRATCH_REG.value, addr) return (X86_64_SCRATCH_REG.value, 0)
def _addr_as_reg_offset(self, addr): # Encodes a (64-bit) address as an offset from the scratch register. # If we are within a "reuse_scratch_register" block, we remember the # last value we loaded to the scratch register and encode the address # as an offset from that if we can if self._scratch_register_value != -1: offset = r_uint(addr) - r_uint(self._scratch_register_value) offset = intmask(offset) if rx86.fits_in_32bits(offset): #print '_addr_as_reg_offset(%x) [REUSED r11+%d]' % ( # addr, offset) return (X86_64_SCRATCH_REG.value, offset) #print '_addr_as_reg_offset(%x) [too far]' % (addr,) # else: fall through #else: # print '_addr_as_reg_offset(%x) [new]' % (addr,) self._scratch_register_value = addr self.MOV_ri(X86_64_SCRATCH_REG.value, addr) return (X86_64_SCRATCH_REG.value, 0)
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx old_value = ecx elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il8(rx86.Conditions['NE'], 0) jne_location = mc.get_relative_pos() # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... mc.MOV(heap(fastgil), ecx) # patch the JNE above offset = mc.get_relative_pos() - jne_location assert 0 < offset <= 127 mc.overwrite(jne_location - 1, chr(offset)) else: mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # # Yes, we need to call the reacqgil() function if not self.result_value_saved_early: self.save_result_value(save_edx=False) if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(follow_jump(self.asm.reacqgil_addr))) if not self.result_value_saved_early: self.restore_result_value(save_edx=False) # # patch the JE above offset = mc.get_relative_pos() - je_location assert 0 < offset <= 127 mc.overwrite(je_location - 1, chr(offset)) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL) mc = self.mc restore_edx = False # # Make sure we can use 'eax' in the sequel for CMPXCHG # On 32-bit, we also need to check if restype is 'L' for long long, # in which case we need to save eax and edx because they are both # used for the return value. if self.restype in (INT, 'L') and not self.result_value_saved_early: self.save_result_value(save_edx=self.restype == 'L') self.result_value_saved_early = True # # Use LOCK CMPXCHG as a compare-and-swap with memory barrier. tlsreg = self.get_tlofs_reg() thread_ident_ofs = lltls.get_thread_ident_offset(self.asm.cpu) # mc.MOV_rm(ecx.value, (tlsreg.value, thread_ident_ofs)) mc.XOR_rr(eax.value, eax.value) if rx86.fits_in_32bits(fastgil): mc.LOCK() mc.CMPXCHG_jr(fastgil, ecx.value) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.LOCK() mc.CMPXCHG_mr((X86_64_SCRATCH_REG.value, 0), ecx.value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap): from rpython.jit.backend.x86.assembler import heap assert gcrootmap.is_shadow_stack # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il(rx86.Conditions['NZ'], 0xfffff) # patched later early_jump_addr = mc.get_relative_pos(break_basic_block=False) # ^^^ this jump will go to almost the same place as the # ReacqGilSlowPath() computes, but one instruction further, # i.e. just after the "MOV(heap(fastgil), 0)". rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.early_jump_addr = early_jump_addr sp.fastgil = fastgil else: sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NZ']) sp.callbuilder = self sp.set_continue_addr(mc) self.asm.pending_slowpaths.append(sp) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx old_value = ecx elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # # Yes, we need to call the reacqgil() function self.save_result_value_reacq() if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(self.asm.reacqgil_addr)) self.restore_result_value_reacq() # # patch the JE above offset = mc.get_relative_pos() - je_location assert 0 < offset <= 127 mc.overwrite(je_location-1, chr(offset)) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx old_value = ecx elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il8(rx86.Conditions['NE'], 0) jne_location = mc.get_relative_pos() # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # revert the rpy_fastgil acquired above, so that the # general 'reacqgil_addr' below can acquire it again... mc.MOV(heap(fastgil), ecx) # patch the JNE above offset = mc.get_relative_pos() - jne_location assert 0 < offset <= 127 mc.overwrite(jne_location-1, chr(offset)) else: mc.J_il8(rx86.Conditions['E'], 0) je_location = mc.get_relative_pos() # # Yes, we need to call the reacqgil() function if not self.result_value_saved_early: self.save_result_value(save_edx=False) if self.asm._is_asmgcc(): if IS_X86_32: mc.MOV_sr(4, old_value.value) mc.MOV_sr(0, css_value.value) # on X86_64, they are already in the right registers mc.CALL(imm(follow_jump(self.asm.reacqgil_addr))) if not self.result_value_saved_early: self.restore_result_value(save_edx=False) # # patch the JE above offset = mc.get_relative_pos() - je_location assert 0 < offset <= 127 mc.overwrite(je_location-1, chr(offset)) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.x86 import rx86 # # check if we need to call the reacqgil() function or not # (to acquiring the GIL, remove the asmgcc head from # the chained list, etc.) mc = self.mc restore_edx = False if not self.asm._is_asmgcc(): css = 0 css_value = imm(0) old_value = ecx else: from rpython.memory.gctransform import asmgcroot css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS) if IS_X86_32: assert css >= 16 if self.restype == 'L': # long long result: eax/edx if not self.result_value_saved_early: mc.MOV_sr(12, edx.value) restore_edx = True css_value = edx # note: duplicated in ReacqGilSlowPath old_value = ecx # elif IS_X86_64: css_value = edi old_value = esi mc.LEA_rs(css_value.value, css) # # Use XCHG as an atomic test-and-set-lock. It also implicitly # does a memory barrier. mc.MOV(old_value, imm(1)) if rx86.fits_in_32bits(fastgil): mc.XCHG_rj(old_value.value, fastgil) else: mc.MOV_ri(X86_64_SCRATCH_REG.value, fastgil) mc.XCHG_rm(old_value.value, (X86_64_SCRATCH_REG.value, 0)) mc.CMP(old_value, css_value) # gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap if bool(gcrootmap) and gcrootmap.is_shadow_stack: from rpython.jit.backend.x86.assembler import heap # # When doing a call_release_gil with shadowstack, there # is the risk that the 'rpy_fastgil' was free but the # current shadowstack can be the one of a different # thread. So here we check if the shadowstack pointer # is still the same as before we released the GIL (saved # in 'ebx'), and if not, we fall back to 'reacqgil_addr'. mc.J_il(rx86.Conditions['NE'], 0xfffff) # patched later early_jump_addr = mc.get_relative_pos(break_basic_block=False) # ^^^ this jump will go to almost the same place as the # ReacqGilSlowPath() computes, but one instruction farther, # i.e. just after the "MOV(heap(fastgil), ecx)". # here, ecx (=old_value) is zero (so rpy_fastgil was in 'released' # state before the XCHG, but the XCHG acquired it by writing 1) rst = gcrootmap.get_root_stack_top_addr() mc = self.mc mc.CMP(ebx, heap(rst)) sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.early_jump_addr = early_jump_addr sp.fastgil = fastgil else: sp = self.ReacqGilSlowPath(mc, rx86.Conditions['NE']) sp.callbuilder = self sp.set_continue_addr(mc) self.asm.pending_slowpaths.append(sp) # if restore_edx: mc.MOV_rs(edx.value, 12) # restore this # if self.result_value_saved_early: self.restore_result_value(save_edx=True) # if not we_are_translated(): # for testing: now we can accesss mc.SUB(ebp, imm(1)) # ebp again # # Now that we required the GIL, we can reload a possibly modified ebp if self.asm._is_asmgcc(): # special-case: reload ebp from the css from rpython.memory.gctransform import asmgcroot index_of_ebp = css + WORD * (2 + asmgcroot.INDEX_OF_EBP) mc.MOV_rs(ebp.value, index_of_ebp) # MOV EBP, [css.ebp]
def INSN(self, loc1, loc2): code1 = loc1.location_code() code2 = loc2.location_code() # You cannot pass in the scratch register as a location, # except with a MOV instruction. if name.startswith('MOV'): if loc2 is X86_64_SCRATCH_REG: assert code1 != 'j' and code1 != 'm' and code1 != 'a' if loc1 is X86_64_SCRATCH_REG: self.forget_scratch_register() elif loc1 is X86_64_SCRATCH_REG or loc2 is X86_64_SCRATCH_REG: raise AssertionError("%s with scratch reg specified" % name) for possible_code2 in unrolling_location_codes: if not has_implementation_for('?', possible_code2): continue if code2 == possible_code2: val2 = getattr(loc2, "value_" + possible_code2)() # # Fake out certain operations for x86_64 if self.WORD == 8 and possible_code2 == 'i' and not rx86.fits_in_32bits( val2): if insn_with_64_bit_immediate(self, loc1, loc2): return # done loc2 = X86_64_SCRATCH_REG code2 = 'r' # NB. unrolling_location_codes contains 'r' # after 'i', so that it will be found after # this iteration continue # # Regular case for possible_code1 in unrolling_location_codes: if not has_implementation_for(possible_code1, possible_code2): continue if code1 == possible_code1: val1 = getattr(loc1, "value_" + possible_code1)() # More faking out of certain operations for x86_64 fits32 = rx86.fits_in_32bits if possible_code1 == 'j' and not fits32(val1): val1 = self._addr_as_reg_offset(val1) invoke(self, "m" + possible_code2, val1, val2) return if possible_code2 == 'j' and not fits32(val2): val2 = self._addr_as_reg_offset(val2) invoke(self, possible_code1 + "m", val1, val2) return if possible_code1 == 'm' and not fits32(val1[1]): val1 = self._fix_static_offset_64_m(val1) if possible_code2 == 'm' and not fits32(val2[1]): val2 = self._fix_static_offset_64_m(val2) if possible_code1 == 'a' and not fits32(val1[3]): val1 = self._fix_static_offset_64_a(val1) if possible_code2 == 'a' and not fits32(val2[3]): val2 = self._fix_static_offset_64_a(val2) invoke(self, possible_code1 + possible_code2, val1, val2) return _missing_binary_insn(name, code1, code2)
def consider_int_sub(self, op, position): y = op.getarg(1) if isinstance(y, ConstInt) and rx86.fits_in_32bits(-y.value): pass # nothing to be hinted else: self._consider_binop(op, position)