def test_free_with_alignment(): codemap = CodemapStorage() codemap.setup() builder = CodemapBuilder() builder.enter_portal_frame(23, 34, 0) builder.enter_portal_frame(45, 56, 20) codemap.register_codemap(builder.get_final_bytecode(200, 100)) assert unpack_traceback(215) == [34] assert unpack_traceback(225) == [34, 56] codemap.free_asm_block(190, 310) # a bit larger assert unpack_traceback(215) == [] assert unpack_traceback(225) == [] codemap.free()
class BaseAssembler(object): """ Base class for Assembler generator in real backends """ def __init__(self, cpu, translate_support_code=False): self.cpu = cpu self.memcpy_addr = 0 self.memset_addr = 0 self.rtyper = cpu.rtyper self._debug = False def stitch_bridge(self, faildescr, target): raise NotImplementedError def setup_once(self): # the address of the function called by 'new' gc_ll_descr = self.cpu.gc_ll_descr gc_ll_descr.initialize() if hasattr(gc_ll_descr, 'minimal_size_in_nursery'): self.gc_minimal_size_in_nursery = gc_ll_descr.minimal_size_in_nursery else: self.gc_minimal_size_in_nursery = 0 if hasattr(gc_ll_descr, 'gcheaderbuilder'): self.gc_size_of_header = gc_ll_descr.gcheaderbuilder.size_gc_header else: self.gc_size_of_header = WORD # for tests self.memcpy_addr = rffi.cast(lltype.Signed, memcpy_fn) self.memset_addr = rffi.cast(lltype.Signed, memset_fn) self._build_failure_recovery(False, withfloats=False) self._build_failure_recovery(True, withfloats=False) self._build_wb_slowpath(False) self._build_wb_slowpath(True) self._build_wb_slowpath(False, for_frame=True) # only one of those self.build_frame_realloc_slowpath() if self.cpu.supports_floats: self._build_failure_recovery(False, withfloats=True) self._build_failure_recovery(True, withfloats=True) self._build_wb_slowpath(False, withfloats=True) self._build_wb_slowpath(True, withfloats=True) self._build_propagate_exception_path() if gc_ll_descr.get_malloc_slowpath_addr is not None: # generate few slowpaths for various cases self.malloc_slowpath = self._build_malloc_slowpath(kind='fixed') self.malloc_slowpath_varsize = self._build_malloc_slowpath( kind='var') if hasattr(gc_ll_descr, 'malloc_str'): self.malloc_slowpath_str = self._build_malloc_slowpath(kind='str') else: self.malloc_slowpath_str = None if hasattr(gc_ll_descr, 'malloc_unicode'): self.malloc_slowpath_unicode = self._build_malloc_slowpath( kind='unicode') else: self.malloc_slowpath_unicode = None lst = [0, 0, 0, 0] lst[0] = self._build_cond_call_slowpath(False, False) lst[1] = self._build_cond_call_slowpath(False, True) if self.cpu.supports_floats: lst[2] = self._build_cond_call_slowpath(True, False) lst[3] = self._build_cond_call_slowpath(True, True) self.cond_call_slowpath = lst self._build_stack_check_slowpath() self._build_release_gil(gc_ll_descr.gcrootmap) if not self._debug: # if self._debug is already set it means that someone called # set_debug by hand before initializing the assembler. Leave it # as it is self.set_debug(have_debug_prints_for('jit-backend-counts')) # when finishing, we only have one value at [0], the rest dies self.gcmap_for_finish = lltype.malloc(jitframe.GCMAP, 1, flavor='raw', track_allocation=False) self.gcmap_for_finish[0] = r_uint(1) def setup(self, looptoken): if self.cpu.HAS_CODEMAP: self.codemap_builder = CodemapBuilder() self._finish_gcmap = lltype.nullptr(jitframe.GCMAP) def set_debug(self, v): r = self._debug self._debug = v return r def rebuild_faillocs_from_descr(self, descr, inputargs): locs = [] GPR_REGS = len(self.cpu.gen_regs) XMM_REGS = len(self.cpu.float_regs) input_i = 0 if self.cpu.IS_64_BIT: coeff = 1 else: coeff = 2 for pos in descr.rd_locs: pos = rffi.cast(lltype.Signed, pos) if pos == 0xFFFF: continue elif pos < GPR_REGS: locs.append(self.cpu.gen_regs[pos]) elif pos < GPR_REGS + XMM_REGS * coeff: pos = (pos - GPR_REGS) // coeff locs.append(self.cpu.float_regs[pos]) else: i = pos - self.cpu.JITFRAME_FIXED_SIZE assert i >= 0 tp = inputargs[input_i].type locs.append(self.new_stack_loc(i, tp)) input_i += 1 return locs _previous_rd_locs = [] def store_info_on_descr(self, startspos, guardtok): withfloats = False for box in guardtok.failargs: if box is not None and \ (box.type == FLOAT or box.type == VECTOR): withfloats = True break exc = guardtok.must_save_exception() target = self.failure_recovery_code[exc + 2 * withfloats] fail_descr = cast_instance_to_gcref(guardtok.faildescr) fail_descr = rffi.cast(lltype.Signed, fail_descr) base_ofs = self.cpu.get_baseofs_of_frame_field() # # in practice, about 2/3rd of 'positions' lists that we build are # exactly the same as the previous one, so share the lists to # conserve memory if len(self._previous_rd_locs) == len(guardtok.fail_locs): positions = self._previous_rd_locs # tentatively shared = True else: positions = [rffi.cast(rffi.USHORT, 0)] * len(guardtok.fail_locs) shared = False # for i, loc in enumerate(guardtok.fail_locs): if loc is None: position = 0xFFFF elif loc.is_stack(): assert (loc.value & (WORD - 1)) == 0, \ "store_info_on_descr: misaligned" position = (loc.value - base_ofs) // WORD assert 0 < position < 0xFFFF, "store_info_on_descr: overflow!" else: assert loc is not self.cpu.frame_reg # for now if self.cpu.IS_64_BIT: coeff = 1 else: coeff = 2 if loc.is_float(): position = len(self.cpu.gen_regs) + loc.value * coeff else: position = self.cpu.all_reg_indexes[loc.value] if shared: if (rffi.cast(lltype.Signed, self._previous_rd_locs[i]) == rffi.cast(lltype.Signed, position)): continue # still equal positions = positions[:] shared = False positions[i] = rffi.cast(rffi.USHORT, position) self._previous_rd_locs = positions # write down the positions of locs guardtok.faildescr.rd_locs = positions return fail_descr, target def enter_portal_frame(self, op): if self.cpu.HAS_CODEMAP: self.codemap_builder.enter_portal_frame(op.getarg(0).getint(), op.getarg(1).getint(), self.mc.get_relative_pos()) def leave_portal_frame(self, op): if self.cpu.HAS_CODEMAP: self.codemap_builder.leave_portal_frame(op.getarg(0).getint(), self.mc.get_relative_pos()) def call_assembler(self, op, argloc, vloc, result_loc, tmploc): """ * argloc: location of the frame argument that we're passing to the called assembler (this is the first return value of locs_for_call_assembler()) * vloc: location of the virtualizable (not in a register; this is the optional second return value of locs_for_call_assembler(), or imm(0) if none returned) * result_loc: location of op.result (which is not be confused with the next one) * tmploc: location where the actual call to the other piece of assembler will return its jitframe result (which is always a REF), before the helper may be called """ descr = op.getdescr() assert isinstance(descr, JitCellToken) # # Write a call to the target assembler # we need to allocate the frame, keep in sync with runner's # execute_token jd = descr.outermost_jitdriver_sd self._call_assembler_emit_call(self.imm(descr._ll_function_addr), argloc, tmploc) if op.type == 'v': assert result_loc is None value = self.cpu.done_with_this_frame_descr_void else: kind = op.type if kind == INT: assert result_loc is tmploc value = self.cpu.done_with_this_frame_descr_int elif kind == REF: assert result_loc is tmploc value = self.cpu.done_with_this_frame_descr_ref elif kind == FLOAT: value = self.cpu.done_with_this_frame_descr_float else: raise AssertionError(kind) gcref = cast_instance_to_gcref(value) if gcref: rgc._make_sure_does_not_move(gcref) value = rffi.cast(lltype.Signed, gcref) je_location = self._call_assembler_check_descr(value, tmploc) # # Path A: use assembler_helper_adr assert jd is not None asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr) self._call_assembler_emit_helper_call(self.imm(asm_helper_adr), [tmploc, vloc], result_loc) jmp_location = self._call_assembler_patch_je(result_loc, je_location) # Path B: fast path. Must load the return value # self._call_assembler_load_result(op, result_loc) # # Here we join Path A and Path B again self._call_assembler_patch_jmp(jmp_location) @specialize.argtype(1) def _inject_debugging_code(self, looptoken, operations, tp, number): if self._debug: s = 0 for op in operations: s += op.getopnum() newoperations = [] self._append_debugging_code(newoperations, tp, number, None) for op in operations: newoperations.append(op) if op.getopnum() == rop.LABEL: self._append_debugging_code(newoperations, 'l', number, op.getdescr()) operations = newoperations return operations def _append_debugging_code(self, operations, tp, number, token): counter = self._register_counter(tp, number, token) c_adr = ConstInt(rffi.cast(lltype.Signed, counter)) operations.append( ResOperation(rop.INCREMENT_DEBUG_COUNTER, [c_adr], None)) def _register_counter(self, tp, number, token): # YYY very minor leak -- we need the counters to stay alive # forever, just because we want to report them at the end # of the process # XXX the numbers here are ALMOST unique, but not quite, use a counter # or something struct = lltype.malloc(DEBUG_COUNTER, flavor='raw', track_allocation=False) struct.i = 0 struct.type = tp if tp == 'b' or tp == 'e': struct.number = number else: assert token struct.number = compute_unique_id(token) self.loop_run_counters.append(struct) return struct def finish_once(self): if self._debug: debug_start('jit-backend-counts') for i in range(len(self.loop_run_counters)): struct = self.loop_run_counters[i] if struct.type == 'l': prefix = 'TargetToken(%d)' % struct.number else: num = struct.number if num == -1: num = '-1' else: num = str(r_uint(num)) if struct.type == 'b': prefix = 'bridge %s' % num else: prefix = 'entry %s' % num debug_print(prefix + ':' + str(struct.i)) debug_stop('jit-backend-counts') @staticmethod @rgc.no_collect def _reacquire_gil_asmgcc(css, old_rpy_fastgil): # Before doing an external call, 'rpy_fastgil' is initialized to # be equal to css. This function is called if we find out after # the call that it is no longer equal to css. See description # in translator/c/src/thread_pthread.c. if old_rpy_fastgil == 0: # this case occurs if some other thread stole the GIL but # released it again. What occurred here is that we changed # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the # GIL. pass elif old_rpy_fastgil == 1: # 'rpy_fastgil' was (and still is) locked by someone else. # We need to wait for the regular mutex. after = rffi.aroundstate.after if after: after() else: # stole the GIL from a different thread that is also # currently in an external call from the jit. Attach # the 'old_rpy_fastgil' into the chained list. from rpython.memory.gctransform import asmgcroot oth = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, old_rpy_fastgil) next = asmgcroot.gcrootanchor.next oth.next = next oth.prev = asmgcroot.gcrootanchor asmgcroot.gcrootanchor.next = oth next.prev = oth # similar to trackgcroot.py:pypy_asm_stackwalk, second part: # detach the 'css' from the chained list from rpython.memory.gctransform import asmgcroot old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css) prev = old.prev next = old.next prev.next = next next.prev = prev @staticmethod @rgc.no_collect def _reacquire_gil_shadowstack(): # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode, # 'rpy_fastgil' contains only zero or non-zero, and this is only # called when the old value stored in 'rpy_fastgil' was non-zero # (i.e. still locked, must wait with the regular mutex) after = rffi.aroundstate.after if after: after() _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) _REACQGIL2_FUNC = lltype.Ptr(lltype.FuncType([rffi.CCHARP, lltype.Signed], lltype.Void)) def _build_release_gil(self, gcrootmap): if gcrootmap is None or gcrootmap.is_shadow_stack: reacqgil_func = llhelper(self._REACQGIL0_FUNC, self._reacquire_gil_shadowstack) self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) else: reacqgil_func = llhelper(self._REACQGIL2_FUNC, self._reacquire_gil_asmgcc) self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) def _is_asmgcc(self): gcrootmap = self.cpu.gc_ll_descr.gcrootmap return bool(gcrootmap) and not gcrootmap.is_shadow_stack
def setup(self, looptoken): if self.cpu.HAS_CODEMAP: self.codemap_builder = CodemapBuilder() self._finish_gcmap = lltype.nullptr(jitframe.GCMAP)
class BaseAssembler(object): """ Base class for Assembler generator in real backends """ def __init__(self, cpu, translate_support_code=False): self.cpu = cpu self.memcpy_addr = 0 self.memset_addr = 0 self.rtyper = cpu.rtyper # do not rely on this attribute if you test for jitlog self._debug = False self.loop_run_counters = [] # XXX register allocation statistics to be removed later self.num_moves_calls = 0 self.num_moves_jump = 0 self.num_spills = 0 self.num_spills_to_existing = 0 self.num_reloads = 0 self.preamble_num_moves_calls = 0 self.preamble_num_moves_jump = 0 self.preamble_num_spills = 0 self.preamble_num_spills_to_existing = 0 self.preamble_num_reloads = 0 def stitch_bridge(self, faildescr, target): raise NotImplementedError def setup_once(self): # the address of the function called by 'new' gc_ll_descr = self.cpu.gc_ll_descr gc_ll_descr.initialize() if hasattr(gc_ll_descr, 'minimal_size_in_nursery'): self.gc_minimal_size_in_nursery = gc_ll_descr.minimal_size_in_nursery else: self.gc_minimal_size_in_nursery = 0 if hasattr(gc_ll_descr, 'gcheaderbuilder'): self.gc_size_of_header = gc_ll_descr.gcheaderbuilder.size_gc_header else: self.gc_size_of_header = WORD # for tests self.memcpy_addr = rffi.cast(lltype.Signed, memcpy_fn) self.memset_addr = rffi.cast(lltype.Signed, memset_fn) self._build_failure_recovery(False, withfloats=False) self._build_failure_recovery(True, withfloats=False) self._build_wb_slowpath(False) self._build_wb_slowpath(True) self._build_wb_slowpath(False, for_frame=True) # only one of those self.build_frame_realloc_slowpath() if self.cpu.supports_floats: self._build_failure_recovery(False, withfloats=True) self._build_failure_recovery(True, withfloats=True) self._build_wb_slowpath(False, withfloats=True) self._build_wb_slowpath(True, withfloats=True) self._build_propagate_exception_path() if gc_ll_descr.get_malloc_slowpath_addr is not None: # generate few slowpaths for various cases self.malloc_slowpath = self._build_malloc_slowpath(kind='fixed') self.malloc_slowpath_varsize = self._build_malloc_slowpath( kind='var') if hasattr(gc_ll_descr, 'malloc_str'): self.malloc_slowpath_str = self._build_malloc_slowpath(kind='str') else: self.malloc_slowpath_str = None if hasattr(gc_ll_descr, 'malloc_unicode'): self.malloc_slowpath_unicode = self._build_malloc_slowpath( kind='unicode') else: self.malloc_slowpath_unicode = None lst = [0, 0, 0, 0] lst[0] = self._build_cond_call_slowpath(False, False) lst[1] = self._build_cond_call_slowpath(False, True) if self.cpu.supports_floats: lst[2] = self._build_cond_call_slowpath(True, False) lst[3] = self._build_cond_call_slowpath(True, True) self.cond_call_slowpath = lst self._build_stack_check_slowpath() self._build_release_gil(gc_ll_descr.gcrootmap) # do not rely on the attribute _debug for jitlog if not self._debug: # if self._debug is already set it means that someone called # set_debug by hand before initializing the assembler. Leave it # as it is should_debug = have_debug_prints_for('jit-backend-counts') self.set_debug(should_debug) # when finishing, we only have one value at [0], the rest dies self.gcmap_for_finish = lltype.malloc(jitframe.GCMAP, 1, flavor='raw', track_allocation=False) self.gcmap_for_finish[0] = r_uint(1) def setup(self, looptoken): if self.cpu.HAS_CODEMAP: self.codemap_builder = CodemapBuilder() self._finish_gcmap = lltype.nullptr(jitframe.GCMAP) def setup_gcrefs_list(self, allgcrefs): self._allgcrefs = allgcrefs self._allgcrefs_faildescr_next = 0 def teardown_gcrefs_list(self): self._allgcrefs = None def get_gcref_from_faildescr(self, descr): """This assumes that it is called in order for all faildescrs.""" search = cast_instance_to_gcref(descr) while not _safe_eq(self._allgcrefs[self._allgcrefs_faildescr_next], search): self._allgcrefs_faildescr_next += 1 assert self._allgcrefs_faildescr_next < len(self._allgcrefs) return self._allgcrefs_faildescr_next def get_asmmemmgr_blocks(self, looptoken): clt = looptoken.compiled_loop_token if clt.asmmemmgr_blocks is None: clt.asmmemmgr_blocks = [] return clt.asmmemmgr_blocks def get_asmmemmgr_gcreftracers(self, looptoken): clt = looptoken.compiled_loop_token if clt.asmmemmgr_gcreftracers is None: clt.asmmemmgr_gcreftracers = [] return clt.asmmemmgr_gcreftracers def set_debug(self, v): r = self._debug self._debug = v return r def rebuild_faillocs_from_descr(self, descr, inputargs): locs = [] GPR_REGS = len(self.cpu.gen_regs) XMM_REGS = len(self.cpu.float_regs) input_i = 0 if self.cpu.IS_64_BIT: coeff = 1 else: coeff = 2 for pos in descr.rd_locs: pos = rffi.cast(lltype.Signed, pos) if pos == 0xFFFF: continue elif pos < GPR_REGS: locs.append(self.cpu.gen_regs[pos]) elif pos < GPR_REGS + XMM_REGS * coeff: pos = (pos - GPR_REGS) // coeff locs.append(self.cpu.float_regs[pos]) else: i = pos - self.cpu.JITFRAME_FIXED_SIZE assert i >= 0 tp = inputargs[input_i].type locs.append(self.new_stack_loc(i, tp)) input_i += 1 return locs _previous_rd_locs = [] def store_info_on_descr(self, startspos, guardtok): withfloats = False for box in guardtok.failargs: if box is not None and \ (box.type == FLOAT or box.type == VECTOR): withfloats = True break exc = guardtok.must_save_exception() target = self.failure_recovery_code[exc + 2 * withfloats] faildescrindex = guardtok.faildescrindex base_ofs = self.cpu.get_baseofs_of_frame_field() # # in practice, about 2/3rd of 'positions' lists that we build are # exactly the same as the previous one, so share the lists to # conserve memory if len(self._previous_rd_locs) == len(guardtok.fail_locs): positions = self._previous_rd_locs # tentatively shared = True else: positions = [rffi.cast(rffi.USHORT, 0)] * len(guardtok.fail_locs) shared = False # for i, loc in enumerate(guardtok.fail_locs): if loc is None: position = 0xFFFF elif loc.is_stack(): assert (loc.value & (WORD - 1)) == 0, \ "store_info_on_descr: misaligned" position = (loc.value - base_ofs) // WORD assert 0 < position < 0xFFFF, "store_info_on_descr: overflow!" else: assert loc is not self.cpu.frame_reg # for now if self.cpu.IS_64_BIT: coeff = 1 else: coeff = 2 if loc.is_float(): position = len(self.cpu.gen_regs) + loc.value * coeff else: position = self.cpu.all_reg_indexes[loc.value] if shared: if (rffi.cast(lltype.Signed, self._previous_rd_locs[i]) == rffi.cast( lltype.Signed, position)): continue # still equal positions = positions[:] shared = False positions[i] = rffi.cast(rffi.USHORT, position) self._previous_rd_locs = positions # write down the positions of locs guardtok.faildescr.rd_locs = positions return faildescrindex, target def enter_portal_frame(self, op): if self.cpu.HAS_CODEMAP: pos = self.mc.get_relative_pos(break_basic_block=False) self.codemap_builder.enter_portal_frame( op.getarg(0).getint(), op.getarg(1).getint(), pos) def leave_portal_frame(self, op): if self.cpu.HAS_CODEMAP: pos = self.mc.get_relative_pos(break_basic_block=False) self.codemap_builder.leave_portal_frame(op.getarg(0).getint(), pos) def call_assembler(self, op, argloc, vloc, result_loc, tmploc): """ * argloc: location of the frame argument that we're passing to the called assembler (this is the first return value of locs_for_call_assembler()) * vloc: location of the virtualizable (not in a register; this is the optional second return value of locs_for_call_assembler(), or imm(0) if none returned) * result_loc: location of op.result (which is not be confused with the next one) * tmploc: location where the actual call to the other piece of assembler will return its jitframe result (which is always a REF), before the helper may be called """ descr = op.getdescr() assert isinstance(descr, JitCellToken) # # Write a call to the target assembler # we need to allocate the frame, keep in sync with runner's # execute_token jd = descr.outermost_jitdriver_sd self._call_assembler_emit_call(self.imm(descr._ll_function_addr), argloc, tmploc) if op.type == 'v': assert result_loc is None value = self.cpu.done_with_this_frame_descr_void else: kind = op.type if kind == INT: assert result_loc is tmploc value = self.cpu.done_with_this_frame_descr_int elif kind == REF: assert result_loc is tmploc value = self.cpu.done_with_this_frame_descr_ref elif kind == FLOAT: value = self.cpu.done_with_this_frame_descr_float else: raise AssertionError(kind) gcref = cast_instance_to_gcref(value) if gcref: rgc._make_sure_does_not_move(gcref) # but should be prebuilt value = rffi.cast(lltype.Signed, gcref) je_location = self._call_assembler_check_descr(value, tmploc) # # Path A: use assembler_helper_adr assert jd is not None asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr) self._call_assembler_emit_helper_call(self.imm(asm_helper_adr), [tmploc, vloc], result_loc) jmp_location = self._call_assembler_patch_je(result_loc, je_location) # Path B: fast path. Must load the return value # self._call_assembler_load_result(op, result_loc) # # Here we join Path A and Path B again self._call_assembler_patch_jmp(jmp_location) def get_loop_run_counters(self, index): return self.loop_run_counters[index] @specialize.argtype(1) def _inject_debugging_code(self, looptoken, operations, tp, number): if self._debug or jl.jitlog_enabled(): newoperations = [] self._append_debugging_code(newoperations, tp, number, None) for op in operations: newoperations.append(op) if op.getopnum() == rop.LABEL: self._append_debugging_code(newoperations, 'l', number, op.getdescr()) operations = newoperations return operations def _append_debugging_code(self, operations, tp, number, token): counter = self._register_counter(tp, number, token) c_adr = ConstInt(rffi.cast(lltype.Signed, counter)) operations.append(ResOperation(rop.INCREMENT_DEBUG_COUNTER, [c_adr])) def _register_counter(self, tp, number, token): # XXX the numbers here are ALMOST unique, but not quite, use a counter # or something struct = lltype.malloc(DEBUG_COUNTER, flavor='raw', track_allocation=False) struct.i = 0 struct.type = tp if tp == 'b' or tp == 'e': struct.number = number else: assert token struct.number = compute_unique_id(token) # YYY very minor leak -- we need the counters to stay alive # forever, just because we want to report them at the end # of the process self.loop_run_counters.append(struct) return struct def finish_once(self): if self._debug: # TODO remove the old logging system when jitlog is complete debug_start('jit-backend-counts') length = len(self.loop_run_counters) for i in range(length): struct = self.loop_run_counters[i] if struct.type == 'l': prefix = 'TargetToken(%d)' % struct.number else: num = struct.number if num == -1: num = '-1' else: num = str(r_uint(num)) if struct.type == 'b': prefix = 'bridge %s' % num else: prefix = 'entry %s' % num debug_print(prefix + ':' + str(struct.i)) debug_stop('jit-backend-counts') self.flush_trace_counters() def flush_trace_counters(self): # this is always called, the jitlog knows if it is enabled length = len(self.loop_run_counters) for i in range(length): struct = self.loop_run_counters[i] # only log if it has been executed if struct.i > 0: jl._log_jit_counter(struct) # reset the counter, flush in a later point in time will # add up the counters! struct.i = 0 # here would be the point to free some counters # see YYY comment above! but first we should run this every once in a while # not just when jitlog_disable is called @staticmethod @rgc.no_collect def _reacquire_gil_asmgcc(css, old_rpy_fastgil): # Before doing an external call, 'rpy_fastgil' is initialized to # be equal to css. This function is called if we find out after # the call that it is no longer equal to css. See description # in translator/c/src/thread_pthread.c. # XXX some duplicated logic here, but note that rgil.acquire() # does more than just RPyGilAcquire() if old_rpy_fastgil == 0: # this case occurs if some other thread stole the GIL but # released it again. What occurred here is that we changed # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the # GIL. pass elif old_rpy_fastgil == 1: # 'rpy_fastgil' was (and still is) locked by someone else. # We need to wait for the regular mutex. from rpython.rlib import rgil rgil.acquire() else: # stole the GIL from a different thread that is also # currently in an external call from the jit. Attach # the 'old_rpy_fastgil' into the chained list. from rpython.memory.gctransform import asmgcroot oth = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, old_rpy_fastgil) next = asmgcroot.gcrootanchor.next oth.next = next oth.prev = asmgcroot.gcrootanchor asmgcroot.gcrootanchor.next = oth next.prev = oth # similar to trackgcroot.py:pypy_asm_stackwalk, second part: # detach the 'css' from the chained list from rpython.memory.gctransform import asmgcroot old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css) prev = old.prev next = old.next prev.next = next next.prev = prev @staticmethod @rgc.no_collect def _reacquire_gil_shadowstack(): # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode, # 'rpy_fastgil' contains only zero or non-zero, and this is only # called when the old value stored in 'rpy_fastgil' was non-zero # (i.e. still locked, must wait with the regular mutex) from rpython.rlib import rgil rgil.acquire() _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) _REACQGIL2_FUNC = lltype.Ptr( lltype.FuncType([rffi.CCHARP, lltype.Signed], lltype.Void)) def _build_release_gil(self, gcrootmap): if gcrootmap is None or gcrootmap.is_shadow_stack: reacqgil_func = llhelper(self._REACQGIL0_FUNC, self._reacquire_gil_shadowstack) self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) else: reacqgil_func = llhelper(self._REACQGIL2_FUNC, self._reacquire_gil_asmgcc) self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) def _is_asmgcc(self): gcrootmap = self.cpu.gc_ll_descr.gcrootmap return bool(gcrootmap) and not gcrootmap.is_shadow_stack
class BaseAssembler(object): """ Base class for Assembler generator in real backends """ def __init__(self, cpu, translate_support_code=False): self.cpu = cpu self.memcpy_addr = 0 self.memset_addr = 0 self.rtyper = cpu.rtyper self._debug = False def setup_once(self): # the address of the function called by 'new' gc_ll_descr = self.cpu.gc_ll_descr gc_ll_descr.initialize() if hasattr(gc_ll_descr, 'minimal_size_in_nursery'): self.gc_minimal_size_in_nursery = gc_ll_descr.minimal_size_in_nursery else: self.gc_minimal_size_in_nursery = 0 if hasattr(gc_ll_descr, 'gcheaderbuilder'): self.gc_size_of_header = gc_ll_descr.gcheaderbuilder.size_gc_header else: self.gc_size_of_header = WORD # for tests self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn) self.memset_addr = self.cpu.cast_ptr_to_int(memset_fn) self._build_failure_recovery(False, withfloats=False) self._build_failure_recovery(True, withfloats=False) self._build_wb_slowpath(False) self._build_wb_slowpath(True) self._build_wb_slowpath(False, for_frame=True) # only one of those self.build_frame_realloc_slowpath() if self.cpu.supports_floats: self._build_failure_recovery(False, withfloats=True) self._build_failure_recovery(True, withfloats=True) self._build_wb_slowpath(False, withfloats=True) self._build_wb_slowpath(True, withfloats=True) self._build_propagate_exception_path() if gc_ll_descr.get_malloc_slowpath_addr is not None: # generate few slowpaths for various cases self.malloc_slowpath = self._build_malloc_slowpath(kind='fixed') self.malloc_slowpath_varsize = self._build_malloc_slowpath( kind='var') if hasattr(gc_ll_descr, 'malloc_str'): self.malloc_slowpath_str = self._build_malloc_slowpath(kind='str') else: self.malloc_slowpath_str = None if hasattr(gc_ll_descr, 'malloc_unicode'): self.malloc_slowpath_unicode = self._build_malloc_slowpath( kind='unicode') else: self.malloc_slowpath_unicode = None self.cond_call_slowpath = [ self._build_cond_call_slowpath(False, False), self._build_cond_call_slowpath(False, True), self._build_cond_call_slowpath(True, False), self._build_cond_call_slowpath(True, True) ] self._build_stack_check_slowpath() self._build_release_gil(gc_ll_descr.gcrootmap) if not self._debug: # if self._debug is already set it means that someone called # set_debug by hand before initializing the assembler. Leave it # as it is self.set_debug(have_debug_prints_for('jit-backend-counts')) # when finishing, we only have one value at [0], the rest dies self.gcmap_for_finish = lltype.malloc(jitframe.GCMAP, 1, flavor='raw', track_allocation=False) self.gcmap_for_finish[0] = r_uint(1) def setup(self, looptoken): if self.cpu.HAS_CODEMAP: self.codemap_builder = CodemapBuilder() self._finish_gcmap = lltype.nullptr(jitframe.GCMAP) def set_debug(self, v): r = self._debug self._debug = v return r def rebuild_faillocs_from_descr(self, descr, inputargs): locs = [] GPR_REGS = len(self.cpu.gen_regs) XMM_REGS = len(self.cpu.float_regs) input_i = 0 if self.cpu.IS_64_BIT: coeff = 1 else: coeff = 2 for pos in descr.rd_locs: pos = rffi.cast(lltype.Signed, pos) if pos == 0xFFFF: continue elif pos < GPR_REGS: locs.append(self.cpu.gen_regs[pos]) elif pos < GPR_REGS + XMM_REGS * coeff: pos = (pos - GPR_REGS) // coeff locs.append(self.cpu.float_regs[pos]) else: i = pos - self.cpu.JITFRAME_FIXED_SIZE assert i >= 0 tp = inputargs[input_i].type locs.append(self.new_stack_loc(i, tp)) input_i += 1 return locs def store_info_on_descr(self, startspos, guardtok): withfloats = False for box in guardtok.failargs: if box is not None and box.type == FLOAT: withfloats = True break exc = guardtok.exc target = self.failure_recovery_code[exc + 2 * withfloats] fail_descr = cast_instance_to_gcref(guardtok.faildescr) fail_descr = rffi.cast(lltype.Signed, fail_descr) base_ofs = self.cpu.get_baseofs_of_frame_field() positions = [rffi.cast(rffi.USHORT, 0)] * len(guardtok.fail_locs) for i, loc in enumerate(guardtok.fail_locs): if loc is None: position = 0xFFFF elif loc.is_stack(): assert (loc.value & (WORD - 1)) == 0, \ "store_info_on_descr: misaligned" position = (loc.value - base_ofs) // WORD assert 0 < position < 0xFFFF, "store_info_on_descr: overflow!" else: assert loc is not self.cpu.frame_reg # for now if self.cpu.IS_64_BIT: coeff = 1 else: coeff = 2 if loc.is_float(): position = len(self.cpu.gen_regs) + loc.value * coeff else: position = self.cpu.all_reg_indexes[loc.value] positions[i] = rffi.cast(rffi.USHORT, position) # write down the positions of locs guardtok.faildescr.rd_locs = positions return fail_descr, target def enter_portal_frame(self, op): if self.cpu.HAS_CODEMAP: self.codemap_builder.enter_portal_frame( op.getarg(0).getint(), op.getarg(1).getint(), self.mc.get_relative_pos()) def leave_portal_frame(self, op): if self.cpu.HAS_CODEMAP: self.codemap_builder.leave_portal_frame( op.getarg(0).getint(), self.mc.get_relative_pos()) def call_assembler(self, op, guard_op, argloc, vloc, result_loc, tmploc): self._store_force_index(guard_op) descr = op.getdescr() assert isinstance(descr, JitCellToken) # # Write a call to the target assembler # we need to allocate the frame, keep in sync with runner's # execute_token jd = descr.outermost_jitdriver_sd self._call_assembler_emit_call(self.imm(descr._ll_function_addr), argloc, tmploc) if op.result is None: assert result_loc is None value = self.cpu.done_with_this_frame_descr_void else: kind = op.result.type if kind == INT: assert result_loc is tmploc value = self.cpu.done_with_this_frame_descr_int elif kind == REF: assert result_loc is tmploc value = self.cpu.done_with_this_frame_descr_ref elif kind == FLOAT: value = self.cpu.done_with_this_frame_descr_float else: raise AssertionError(kind) gcref = cast_instance_to_gcref(value) if gcref: rgc._make_sure_does_not_move(gcref) value = rffi.cast(lltype.Signed, gcref) je_location = self._call_assembler_check_descr(value, tmploc) # # Path A: use assembler_helper_adr assert jd is not None asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr) self._call_assembler_emit_helper_call(self.imm(asm_helper_adr), [tmploc, vloc], result_loc) jmp_location = self._call_assembler_patch_je(result_loc, je_location) # Path B: fast path. Must load the return value # self._call_assembler_load_result(op, result_loc) # # Here we join Path A and Path B again self._call_assembler_patch_jmp(jmp_location) # XXX here should be emitted guard_not_forced, but due # to incompatibilities in how it's done, we leave it for the # caller to deal with @specialize.argtype(1) def _inject_debugging_code(self, looptoken, operations, tp, number): if self._debug: s = 0 for op in operations: s += op.getopnum() newoperations = [] self._append_debugging_code(newoperations, tp, number, None) for op in operations: newoperations.append(op) if op.getopnum() == rop.LABEL: self._append_debugging_code(newoperations, 'l', number, op.getdescr()) operations = newoperations return operations def _append_debugging_code(self, operations, tp, number, token): counter = self._register_counter(tp, number, token) c_adr = ConstInt(rffi.cast(lltype.Signed, counter)) operations.append( ResOperation(rop.INCREMENT_DEBUG_COUNTER, [c_adr], None)) def _register_counter(self, tp, number, token): # YYY very minor leak -- we need the counters to stay alive # forever, just because we want to report them at the end # of the process # XXX the numbers here are ALMOST unique, but not quite, use a counter # or something struct = lltype.malloc(DEBUG_COUNTER, flavor='raw', track_allocation=False) struct.i = 0 struct.type = tp if tp == 'b' or tp == 'e': struct.number = number else: assert token struct.number = compute_unique_id(token) self.loop_run_counters.append(struct) return struct def finish_once(self): if self._debug: debug_start('jit-backend-counts') for i in range(len(self.loop_run_counters)): struct = self.loop_run_counters[i] if struct.type == 'l': prefix = 'TargetToken(%d)' % struct.number else: num = struct.number if num == -1: num = '-1' else: num = str(r_uint(num)) if struct.type == 'b': prefix = 'bridge %s' % num else: prefix = 'entry %s' % num debug_print(prefix + ':' + str(struct.i)) debug_stop('jit-backend-counts') @staticmethod @rgc.no_collect def _reacquire_gil_asmgcc(css, old_rpy_fastgil): # Before doing an external call, 'rpy_fastgil' is initialized to # be equal to css. This function is called if we find out after # the call that it is no longer equal to css. See description # in translator/c/src/thread_pthread.c. if old_rpy_fastgil == 0: # this case occurs if some other thread stole the GIL but # released it again. What occurred here is that we changed # 'rpy_fastgil' from 0 to 1, thus successfully reaquiring the # GIL. pass elif old_rpy_fastgil == 1: # 'rpy_fastgil' was (and still is) locked by someone else. # We need to wait for the regular mutex. after = rffi.aroundstate.after if after: after() else: # stole the GIL from a different thread that is also # currently in an external call from the jit. Attach # the 'old_rpy_fastgil' into the chained list. from rpython.memory.gctransform import asmgcroot oth = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, old_rpy_fastgil) next = asmgcroot.gcrootanchor.next oth.next = next oth.prev = asmgcroot.gcrootanchor asmgcroot.gcrootanchor.next = oth next.prev = oth # similar to trackgcroot.py:pypy_asm_stackwalk, second part: # detach the 'css' from the chained list from rpython.memory.gctransform import asmgcroot old = rffi.cast(asmgcroot.ASM_FRAMEDATA_HEAD_PTR, css) prev = old.prev next = old.next prev.next = next next.prev = prev @staticmethod @rgc.no_collect def _reacquire_gil_shadowstack(): # Simplified version of _reacquire_gil_asmgcc(): in shadowstack mode, # 'rpy_fastgil' contains only zero or non-zero, and this is only # called when the old value stored in 'rpy_fastgil' was non-zero # (i.e. still locked, must wait with the regular mutex) after = rffi.aroundstate.after if after: after() _REACQGIL0_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) _REACQGIL2_FUNC = lltype.Ptr( lltype.FuncType([rffi.CCHARP, lltype.Signed], lltype.Void)) def _build_release_gil(self, gcrootmap): if gcrootmap is None or gcrootmap.is_shadow_stack: reacqgil_func = llhelper(self._REACQGIL0_FUNC, self._reacquire_gil_shadowstack) self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) else: reacqgil_func = llhelper(self._REACQGIL2_FUNC, self._reacquire_gil_asmgcc) self.reacqgil_addr = self.cpu.cast_ptr_to_int(reacqgil_func) def _is_asmgcc(self): gcrootmap = self.cpu.gc_ll_descr.gcrootmap return bool(gcrootmap) and not gcrootmap.is_shadow_stack