예제 #1
0
 def _call_assembler_patch_je(self, result_loc, jmp_location):
     pos = self.mc.currpos()
     self.mc.BRK()
     #
     pmc = OverwritingBuilder(self.mc, jmp_location, WORD)
     pmc.B_ofs_cond(self.mc.currpos() - jmp_location, c.EQ)
     return pos
예제 #2
0
    def _emit_op_cond_call(self, op, arglocs, fcond):
        if len(arglocs) == 2:
            res_loc = arglocs[1]  # cond_call_value
        else:
            res_loc = None  # cond_call
        # see x86.regalloc for why we skip res_loc in the gcmap

        if arglocs[0] is not None:  # otherwise result already in CC
            self.mc.CMP_ri(arglocs[0].value, 0)

        gcmap = self._regalloc.get_gcmap([res_loc])

        jmp_adr = self.mc.currpos()
        self.mc.BRK()  # patched later: the conditional jump
        #
        self.push_gcmap(self.mc, gcmap)
        self.mc.gen_load_int(r.ip1.value,
                             rffi.cast(lltype.Signed,
                                       op.getarg(1).getint()))
        #
        callee_only = False
        floats = False
        if self._regalloc is not None:
            for reg in self._regalloc.rm.reg_bindings.values():
                if reg not in self._regalloc.rm.save_around_call_regs:
                    break
            else:
                callee_only = True
            if self._regalloc.vfprm.reg_bindings:
                floats = True
        cond_call_adr = self.cond_call_slowpath[floats * 2 + callee_only]
        assert cond_call_adr

        self.mc.BL(cond_call_adr)
        # if this is a COND_CALL_VALUE, we need to move the result in place
        # from its current location
        if res_loc is not None:
            self.mc.MOV_rr(res_loc.value, r.ip1.value)
        #
        self.pop_gcmap(self.mc)
        pmc = OverwritingBuilder(self.mc, jmp_adr, WORD)
        pmc.B_ofs_cond(self.mc.currpos() - jmp_adr, fcond)
        # might be overridden again to skip over the following
        # guard_no_exception too
        self.previous_cond_call_jcond = jmp_adr, fcond
예제 #3
0
    def move_real_result_and_call_reacqgil_addr(self, fastgil):
        # try to reacquire the lock.  The following two values are saved
        # across the call and are still alive now:
        RTHREADID = r.x19      # our thread ident
        RSHADOWOLD = r.x20     # old value of the shadowstack pointer

        RPYFASTGIL = r.ip2     # &rpy_fastgil, loaded now:
        self.mc.gen_load_int(RPYFASTGIL.value, fastgil)

        # this comes from gcc compiling this code:
        #    __sync_bool_compare_and_swap(&rpy_fastgil, old=0, new=RTHREADID);
        self.mc.LDXR(r.x1.value, RPYFASTGIL.value)
        self.mc.CBNZ(r.x1.value, +12)
        self.mc.STLXR(r.x3.value, RTHREADID.value, RPYFASTGIL.value)
        self.mc.CBNZ_w(r.x3.value, -12)
        self.mc.DMB_ISH()
        # now x1 is the old value of the lock, and if x1 == 0 then the lock
        # now contains RTHREADID

        b1_location = self.mc.currpos()
        self.mc.BRK()        # boehm: patched with a CBZ (jump if x1 == 0)
                             # shadowstack: patched with CBNZ instead

        gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
        if gcrootmap:
            # When doing a call_release_gil with shadowstack, there
            # is the risk that the 'rpy_fastgil' was free but the
            # current shadowstack can be the one of a different
            # thread.  So here we check if the shadowstack pointer
            # is still the same as before we released the GIL (saved
            # in 'x20'), and if not, we fall back to 'reacqgil_addr'.
            rst = gcrootmap.get_root_stack_top_addr()
            self.mc.gen_load_int(r.ip1.value, rst)
            self.mc.LDR_ri(r.ip0.value, r.ip1.value, 0)   # new shadowstack
            self.mc.CMP_rr(r.ip0.value, RSHADOWOLD.value)
            b3_location = self.mc.currpos()
            self.mc.BRK() # B.eq forward

            # revert the rpy_fastgil acquired above, so that the
            # general 'reacqgil_addr' below can acquire it again...
            self.mc.STR_ri(r.xzr.value, RPYFASTGIL.value, 0)

            # patch the b1_location above, with "CBNZ here"
            pmc = OverwritingBuilder(self.mc, b1_location, WORD)
            pmc.CBNZ(r.x1.value, self.mc.currpos() - b1_location)

            open_location = b3_location
        else:
            open_location = b1_location

        # Yes, we need to call the reacqgil() function.
        # save the result we just got
        RSAVEDRES = RTHREADID     # can reuse this reg here to save things
        reg = self.resloc
        if reg is not None:
            if reg.is_core_reg():
                self.mc.MOV_rr(RSAVEDRES.value, reg.value)
            elif reg.is_vfp_reg():
                self.mc.SUB_ri(r.sp.value, r.sp.value, 2 * WORD)
                self.mc.STR_di(reg.value, r.sp.value, 0)

        # call the function
        self.mc.BL(self.asm.reacqgil_addr)

        # restore the saved register
        if reg is not None:
            if reg.is_core_reg():
                self.mc.MOV_rr(reg.value, RSAVEDRES.value)
            elif reg.is_vfp_reg():
                self.mc.LDR_di(reg.value, r.sp.value, 0)
                self.mc.ADD_ri(r.sp.value, r.sp.value, 2 * WORD)

        # now patch the still-open jump above:
        #     boehm: patch b1_location with a CBZ(x1)
        #     shadowstack: patch b3_location with BEQ
        pmc = OverwritingBuilder(self.mc, open_location, WORD)
        offset = self.mc.currpos() - open_location
        if gcrootmap:
            pmc.B_ofs_cond(offset, c.EQ)
        else:
            pmc.CBZ(r.x1.value, offset)

        if not we_are_translated():                    # for testing: now we can accesss
            self.mc.SUB_ri(r.fp.value, r.fp.value, 1)  # fp again
예제 #4
0
    def _write_barrier_fastpath(self, mc, descr, arglocs, array=False, is_frame=False):
        # Write code equivalent to write_barrier() in the GC: it checks
        # a flag in the object at arglocs[0], and if set, it calls a
        # helper piece of assembler.  The latter saves registers as needed
        # and call the function remember_young_pointer() from the GC.
        if we_are_translated():
            cls = self.cpu.gc_ll_descr.has_write_barrier_class()
            assert cls is not None and isinstance(descr, cls)
        #
        card_marking = False
        mask = descr.jit_wb_if_flag_singlebyte
        if array and descr.jit_wb_cards_set != 0:
            # assumptions the rest of the function depends on:
            assert (descr.jit_wb_cards_set_byteofs ==
                    descr.jit_wb_if_flag_byteofs)
            assert descr.jit_wb_cards_set_singlebyte == -0x80
            card_marking = True
            mask = descr.jit_wb_if_flag_singlebyte | -0x80
        #
        loc_base = arglocs[0]
        if is_frame:
            assert loc_base is r.fp
        mc.LDRB_ri(r.ip0.value, loc_base.value, descr.jit_wb_if_flag_byteofs)
        mask &= 0xFF
        mc.MOVZ_r_u16(r.ip1.value, mask, 0)
        mc.TST_rr_shift(r.ip0.value, r.ip1.value, 0)
        jz_location = mc.currpos()
        mc.BRK()

        # for cond_call_gc_wb_array, also add another fast path:
        # if GCFLAG_CARDS_SET, then we can just set one bit and be done
        if card_marking:
            mc.MOVZ_r_u16(r.ip1.value, 0x80, 0)
            # GCFLAG_CARDS_SET is in this byte at 0x80
            mc.TST_rr_shift(r.ip0.value, r.ip1.value, 0)

            js_location = mc.currpos()
            mc.BRK()
        else:
            js_location = 0

        # Write only a CALL to the helper prepared in advance, passing it as
        # argument the address of the structure we are writing into
        # (the first argument to COND_CALL_GC_WB).
        helper_num = card_marking
        if is_frame:
            helper_num = 4
        elif self._regalloc is not None and self._regalloc.vfprm.reg_bindings:
            helper_num += 2
        if self.wb_slowpath[helper_num] == 0:    # tests only
            assert not we_are_translated()
            self.cpu.gc_ll_descr.write_barrier_descr = descr
            self._build_wb_slowpath(card_marking,
                                    bool(self._regalloc.vfprm.reg_bindings))
            assert self.wb_slowpath[helper_num] != 0
        #
        if loc_base is not r.x0:
            # push two registers to keep stack aligned
            mc.SUB_ri(r.sp.value, r.sp.value, 2 * WORD)
            mc.STR_ri(r.x0.value, r.sp.value, WORD)
            mc.STR_ri(loc_base.value, r.sp.value, 0)
            mc.MOV_rr(r.x0.value, loc_base.value)
            if is_frame:
                assert loc_base is r.fp
        mc.BL(self.wb_slowpath[helper_num])
        if loc_base is not r.x0:
            mc.LDR_ri(r.x0.value, r.sp.value, WORD)
            mc.LDR_ri(loc_base.value, r.sp.value, 0)
            mc.ADD_ri(r.sp.value, r.sp.value, 2 * WORD)

        if card_marking:
            # The helper ends again with a check of the flag in the object.  So
            # here, we can simply write again a conditional jump, which will be
            # taken if GCFLAG_CARDS_SET is still not set.
            jns_location = mc.currpos()
            mc.BRK()
            #
            # patch the JS above
            offset = mc.currpos() - js_location
            pmc = OverwritingBuilder(mc, js_location, WORD)
            pmc.B_ofs_cond(offset, c.NE)  # We want to jump if the z flag isn't set
            #
            # case GCFLAG_CARDS_SET: emit a few instructions to do
            # directly the card flag setting
            loc_index = arglocs[1]
            assert loc_index.is_core_reg()
            tmp1 = r.ip1
            tmp2 = arglocs[-1]  # the last item is a preallocated tmp
            # lr = byteofs
            s = 3 + descr.jit_wb_card_page_shift
            mc.MVN_rr_shifted(r.lr.value, loc_index.value, s, shifttype=shift.LSR)

            # tmp1 = byte_index
            mc.MOVZ_r_u16(r.ip0.value, 7, 0)
            mc.AND_rr_shift(tmp1.value, r.ip0.value, loc_index.value,
                            descr.jit_wb_card_page_shift, shifttype=shift.LSR)

            # set the bit
            mc.MOVZ_r_u16(tmp2.value, 1, 0)
            mc.LDRB_rr(r.ip0.value, loc_base.value, r.lr.value)
            mc.LSL_rr(tmp2.value, tmp2.value, tmp1.value)
            mc.ORR_rr(r.ip0.value, r.ip0.value, tmp2.value)
            mc.STR_size_rr(0, r.ip0.value, loc_base.value, r.lr.value)
            # done
            #
            # patch the JNS above
            offset = mc.currpos() - jns_location
            pmc = OverwritingBuilder(mc, jns_location, WORD)
            pmc.B_ofs_cond(offset, c.EQ)  # We want to jump if the z flag is set

        offset = mc.currpos() - jz_location
        pmc = OverwritingBuilder(mc, jz_location, WORD)
        pmc.B_ofs_cond(offset, c.EQ)
예제 #5
0
 def _call_assembler_patch_jmp(self, jmp_location):
     # merge point
     currpos = self.mc.currpos()
     pmc = OverwritingBuilder(self.mc, jmp_location, WORD)
     pmc.B_ofs(currpos - jmp_location)