def emit_vec_pack_i(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, vecloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc = arglocs residx = residxloc.value srcidx = srcidxloc.value count = countloc.value size = sizeloc.value assert isinstance(op, VectorOp) newsize = op.bytesize if count == 1: if resloc.is_core_reg(): assert sourceloc.is_vector_reg() index = l.addr(srcidx) self.mc.VLGV(resloc, sourceloc, index, l.itemsize_to_mask(size)) else: assert sourceloc.is_core_reg() assert resloc.is_vector_reg() index = l.addr(residx) self.mc.VLR(resloc, vecloc) self.mc.VLVG(resloc, sourceloc, index, l.itemsize_to_mask(newsize)) else: assert resloc.is_vector_reg() assert sourceloc.is_vector_reg() self.mc.VLR(resloc, vecloc) for j in range(count): sindex = l.addr(j + srcidx) # load from sourceloc into GP reg and store back into resloc self.mc.VLGV(r.SCRATCH, sourceloc, sindex, l.itemsize_to_mask(size)) rindex = l.addr(j + residx) self.mc.VLVG(resloc, r.SCRATCH, rindex, l.itemsize_to_mask(newsize))
def emit_vec_expand_f(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, loc0 = arglocs size = op.bytesize if loc0.is_fp_reg(): self.mc.VREP(resloc, loc0, l.imm0, l.itemsize_to_mask(size)) else: self.mc.VLREP(resloc, loc0, l.itemsize_to_mask(size))
def emit_vec_int_signext(self, op, arglocs, regalloc): resloc, loc0, osizeloc, nsizeloc = arglocs # signext is only allowed if the data type sizes do not change. # e.g. [byte,byte] = sign_ext([byte, byte]), a simple move is sufficient! osize = osizeloc.value nsize = nsizeloc.value if osize == nsize: self.regalloc_mov(loc0, resloc) elif (osize == 4 and nsize == 8) or (osize == 8 and nsize == 4): self.mc.VLGV(r.SCRATCH, loc0, l.addr(0), l.itemsize_to_mask(osize)) self.mc.VLVG(resloc, r.SCRATCH, l.addr(0), l.itemsize_to_mask(nsize)) self.mc.VLGV(r.SCRATCH, loc0, l.addr(1), l.itemsize_to_mask(osize)) self.mc.VLVG(resloc, r.SCRATCH, l.addr(1), l.itemsize_to_mask(nsize)) if nsize == 8: self.mc.VSEG(resloc, resloc, l.itemsize_to_mask(osize))
def emit_vec_int_ne(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, loc0, loc1, sizeloc = arglocs size = sizeloc.value self.mc.VCEQ(resloc, loc0, loc1, l.itemsize_to_mask(size), l.imm(1)) self.mc.VNO(resloc, resloc, resloc) flush_vec_cc(self, regalloc, c.VNEI, op.bytesize, resloc)
def emit_vec_int_is_true(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, argloc, sizeloc = arglocs size = sizeloc.value tmploc = regalloc.vrm.get_scratch_reg() self.mc.VX(tmploc, tmploc, tmploc) # all zero self.mc.VCHL(resloc, argloc, tmploc, l.itemsize_to_mask(size), l.imm(0b0001)) flush_vec_cc(self, regalloc, c.VEQI, op.bytesize, resloc)
def _accum_reduce(self, op, arg, accumloc, targetloc): # Currently the accumulator can ONLY be 64 bit float/int if arg.type == FLOAT: self.mc.VPDI(targetloc, accumloc, accumloc, permi(1,0)) if op == '+': self.mc.VFA(targetloc, targetloc, accumloc, l.imm3, l.imm(0b1000), l.imm(0)) return elif op == '*': self.mc.VFM(targetloc, targetloc, accumloc, l.imm3, l.imm(0b1000), l.imm(0)) return else: assert arg.type == INT # store the vector onto the stack, just below the stack pointer self.mc.VLGV(r.SCRATCH, accumloc, l.addr(0), l.itemsize_to_mask(8)) self.mc.VLGV(targetloc, accumloc, l.addr(1), l.itemsize_to_mask(8)) if op == '+': self.mc.AGR(targetloc, r.SCRATCH) return elif op == '*': self.mc.MSGR(targetloc, r.SCRATCH) return not_implemented("reduce sum for %s not impl." % arg)
def _accum_reduce(self, op, arg, accumloc, targetloc): # Currently the accumulator can ONLY be 64 bit float/int if arg.type == FLOAT: self.mc.VPDI(targetloc, accumloc, accumloc, permi(1, 0)) if op == '+': self.mc.VFA(targetloc, targetloc, accumloc, l.imm3, l.imm(0b1000), l.imm(0)) return elif op == '*': self.mc.VFM(targetloc, targetloc, accumloc, l.imm3, l.imm(0b1000), l.imm(0)) return else: assert arg.type == INT # store the vector onto the stack, just below the stack pointer self.mc.VLGV(r.SCRATCH, accumloc, l.addr(0), l.itemsize_to_mask(8)) self.mc.VLGV(targetloc, accumloc, l.addr(1), l.itemsize_to_mask(8)) if op == '+': self.mc.AGR(targetloc, r.SCRATCH) return elif op == '*': self.mc.MSGR(targetloc, r.SCRATCH) return not_implemented("reduce sum for %s not impl." % arg)
def flush_vec_cc(asm, regalloc, condition, size, resultloc): # After emitting an instruction that leaves a boolean result in # a condition code (cc), call this. In the common case, resultloc # will be set to SPP by the regalloc, which in this case means # "propagate it between this operation and the next guard by keeping # it in the cc". In the uncommon case, resultloc is another # register, and we emit a load from the cc into this register. if resultloc is r.SPP: asm.guard_success_cc = condition else: ones = regalloc.vrm.get_scratch_reg() zeros = regalloc.vrm.get_scratch_reg() asm.mc.VX(zeros, zeros, zeros) asm.mc.VREPI(ones, l.imm(1), l.itemsize_to_mask(size)) asm.mc.VSEL(resultloc, ones, zeros, resultloc)
def emit_vec_int_sub(self, op, arglocs, regalloc): resloc, loc0, loc1, size_loc = arglocs mask = l.itemsize_to_mask(size_loc.value) self.mc.VS(resloc, loc0, loc1, mask)