def mmLoaddupPd(self, src, repList): sList = sorted(repList, key=lambda t: t[0], reverse=True) dst = sList[0][1] if dst.reglen == 2 and dst.mrmap == [0, 1]: at = src.pointer.getAt() return mmShufflePd(mmLoaduPd(dst.pointer), mmLoaduPd(dst.pointer), (at[0], at[0]))
def storeMatrix(self, mParams): src, dst = mParams['nuM'], mParams['m'] sL, sR = mParams['nuML'], mParams['nuMR'] dL, dR = mParams['mL'], mParams['mR'] M, N = mParams['M'], mParams['N'] isCompact = mParams['compact'] instructions = [] if M == 1 and N == 1: nuv = mmLoaduPd(Pointer(src[sL.of(0), sR.of(0)])) pc = AddressOf(sa(dst[dL.of(0), dR.of(0)])) instr = mmStoreSd(nuv, pc) instructions += [Comment("1x2 -> 1x1"), instr] elif M == 2 and N == 1: if not isCompact: nuv = mmLoaduPd(Pointer(src[sL.of(0), sR.of(0)])) e = mmShufflePd(nuv, nuv, (1, 1)) pcs = [Pointer(dst[dL.of(i), dR.of(0)]) for i in range(2)] instr0 = mmStoreSd(nuv, pcs[0]) instr1 = mmStoreSd(e, pcs[1]) instructions += [ Comment("2x1 -> 2x1 - (Store) Incompact"), instr0, instr1 ] return instructions
def T(self, sParams, dParams, opts): nu = 2 src, dst = sParams['nuM'], dParams['nuM'] sL, sR = sParams['nuML'], sParams['nuMR'] dL, dR = dParams['nuML'], dParams['nuMR'] M, N = dParams['nuMM'], dParams['nuMN'] instructions = [] instructions += [ Comment(str(nu) + "-BLAC: (" + str(N) + "x" + str(M) + ")^T") ] if M * N == nu: va = mmLoaduPd(Pointer(src[sL.of(0), sR.of(0)])) pc = Pointer(dst[dL.of(0), dR.of(0)]) instr = mmStoreuPd(va, pc) instructions += [instr] else: va0 = mmLoaduPd(Pointer(src[sL.of(0), sR.of(0)])) va1 = mmLoaduPd(Pointer(src[sL.of(1), sR.of(0)])) pc0 = Pointer(dst[dL.of(0), dR.of(0)]) pc1 = Pointer(dst[dL.of(1), dR.of(0)]) vt0 = mmUnpackloPd(va0, va1) vt1 = mmUnpackhiPd(va0, va1) instr0 = mmStoreuPd(vt0, pc0) instr1 = mmStoreuPd(vt1, pc1) instructions += [instr0, instr1] return instructions
def Add(self, s0Params, s1Params, dParams, opts): nu = 2 src0, src1, dst = s0Params['nuM'], s1Params['nuM'], dParams['nuM'] s0L, s0R = s0Params['nuML'], s0Params['nuMR'] s1L, s1R = s1Params['nuML'], s1Params['nuMR'] dL, dR = dParams['nuML'], dParams['nuMR'] M, N = dParams['nuMM'], dParams['nuMN'] instructions = [] instructions += [ Comment( str(nu) + "-BLAC: " + str(M) + "x" + str(N) + " + " + str(M) + "x" + str(N)) ] if M * N == nu: va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) pc = Pointer(dst[dL.of(0), dR.of(0)]) instr = mmStoreuPd(mmAddPd(va, vb), pc) instructions += [instr] elif M == nu and N == nu: for i in range(M): va = mmLoaduPd(Pointer(src0[s0L.of(i), s0R.of(0)])) vb = mmLoaduPd(Pointer(src1[s1L.of(i), s1R.of(0)])) pc = Pointer(dst[dL.of(i), dR.of(0)]) instr = mmStoreuPd(mmAddPd(va, vb), pc) instructions += [instr] return instructions
def Kro(self, s0Params, s1Params, dParams, opts): nu = 2 src0, src1, dst = s0Params['nuM'], s1Params['nuM'], dParams['nuM'] s0L, s0R = s0Params['nuML'], s0Params['nuMR'] s1L, s1R = s1Params['nuML'], s1Params['nuMR'] dL, dR = dParams['nuML'], dParams['nuMR'] oM, oK, oN, oP = s0Params['M'], s0Params['N'], s1Params['M'], s1Params[ 'N'] M, K, N, P = s0Params['nuMM'], s0Params['nuMN'], s1Params[ 'nuMM'], s1Params['nuMN'] instructions = [] instructions += [ Comment( str(nu) + "-BLAC: " + str(M) + "x" + str(K) + " Kro " + str(N) + "x" + str(P)) ] if oM * oK * oN * oP == 1: pc = Pointer(dst[dL.of(0), dR.of(0)]) va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) instr = mmStoreuPd(mmMulPd(va, vb), pc) instructions += [instr] elif oM * oK == 1: if N * P == nu: va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) dup = mmShufflePd(va, va, (0, 0)) vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) pc = Pointer(dst[dL.of(0), dR.of(0)]) instr = mmStoreuPd(mmMulPd(dup, vb), pc) instructions += [instr] else: va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) dup = mmShufflePd(va, va, (0, 0)) for i in range(nu): vb = mmLoaduPd(Pointer(src1[s1L.of(i), s1R.of(0)])) pc = Pointer(dst[dL.of(i), dR.of(0)]) instr = mmStoreuPd(mmMulPd(dup, vb), pc) instructions += [instr] else: if M * K == nu: vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) dup = mmShufflePd(vb, vb, (0, 0)) va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) pc = Pointer(dst[dL.of(0), dR.of(0)]) instr = mmStoreuPd(mmMulPd(va, dup), pc) instructions += [instr] else: vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) dup = mmShufflePd(vb, vb, (0, 0)) for i in range(nu): va = mmLoaduPd(Pointer(src0[s0L.of(i), s0R.of(0)])) pc = Pointer(dst[dL.of(i), dR.of(0)]) instr = mmStoreuPd(mmMulPd(va, dup), pc) instructions += [instr] return instructions
def ScaLoad(self, src, repList): #repList is a list of tuples (line, dst) isFlt = (self.opts['precision'] == 'float') if src.pointer.at[1] == 0: return mmCvtssf32(mmLoaduPs( repList[0][1].pointer)) if isFlt else mmCvtsdf64( mmLoaduPd(repList[0][1].pointer)) if isFlt: return mmCvtssf32( mmShufflePs(mmLoaduPs(repList[0][1].pointer), mmLoaduPs(repList[0][1].pointer), (0, 0, 0, src.pointer.at[1]))) return mmCvtsdf64( mmShufflePd(mmLoaduPd(repList[0][1].pointer), mmLoaduPd(repList[0][1].pointer), (0, src.pointer.at[1])))
def ScaLoad(self, src, repList, bounds): ''' src is the ScaLoad object we want to replace. repList is a list of tuples (line, dst). The dst elements of the tuples are store commands. ''' sList = sorted(repList, key=lambda t: t[0], reverse=True) isFlt = (self.opts['precision'] == 'float') if src.pointer.at[1] == 0: return mmCvtssf32(mmLoaduPs( sList[0][1].pointer)) if isFlt else mmCvtsdf64( mmLoaduPd(sList[0][1].pointer)) if isFlt: return mmCvtssf32( mmShufflePs(mmLoaduPs(sList[0][1].pointer), mmLoaduPs(sList[0][1].pointer), (0, 0, 0, src.pointer.at[1]))) return mmCvtsdf64( mmShufflePd(mmLoaduPd(sList[0][1].pointer), mmLoaduPd(sList[0][1].pointer), (0, src.pointer.at[1])))
def Mul(self, s0Params, s1Params, dParams, opts): nu = 2 src0, src1, dst = s0Params['nuM'], s1Params['nuM'], dParams['nuM'] s0L, s0R = s0Params['nuML'], s0Params['nuMR'] s1L, s1R = s1Params['nuML'], s1Params['nuMR'] dL, dR = dParams['nuML'], dParams['nuMR'] M, K, N = s0Params['nuMM'], s0Params['nuMN'], s1Params['nuMN'] instructions = [] instructions += [ Comment( str(nu) + "-BLAC: " + str(M) + "x" + str(K) + " * " + str(K) + "x" + str(N)) ] if M == 1: if N == 1: va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) pc = Pointer(dst[dL.of(0), dR.of(0)]) instr = mmStoreuPd(mmHaddPd(mmMulPd(va, vb), mmSetzeroPd()), pc) # instr = mmStoreSd(mmHaddPd(mmMulPd(va, vb), mmSetzeroPd()), pc) instructions += [instr] else: va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) vb0 = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) vb1 = mmLoaduPd(Pointer(src1[s1L.of(1), s1R.of(0)])) vbt0 = mmUnpackloPd(vb0, vb1) vbt1 = mmUnpackhiPd(vb0, vb1) pc = Pointer(dst[dL.of(0), dR.of(0)]) instr = mmStoreuPd( mmHaddPd(mmMulPd(va, vbt0), mmMulPd(va, vbt1)), pc) instructions += [instr] else: if K == 1: va0 = mmLoaddupPd(Pointer(src0[s0L.of(0), s0R.of(0)])) va1 = mmLoaddupPd(Pointer(src0[s0L.of(1), s0R.of(0)])) vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) pc0 = Pointer(dst[dL.of(0), dR.of(0)]) pc1 = Pointer(dst[dL.of(1), dR.of(0)]) instr0 = mmStoreuPd(mmMulPd(va0, vb), pc0) instr1 = mmStoreuPd(mmMulPd(va1, vb), pc1) instructions += [instr0, instr1] else: if N == 1: va0 = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) va1 = mmLoaduPd(Pointer(src0[s0L.of(1), s0R.of(0)])) vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) pc = Pointer(dst[dL.of(0), dR.of(0)]) instr = mmStoreuPd( mmHaddPd(mmMulPd(va0, vb), mmMulPd(va1, vb)), pc) instructions += [instr] else: va0 = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)])) va1 = mmLoaduPd(Pointer(src0[s0L.of(1), s0R.of(0)])) vb0 = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)])) vb1 = mmLoaduPd(Pointer(src1[s1L.of(1), s1R.of(0)])) vbt0 = mmUnpackloPd(vb0, vb1) vbt1 = mmUnpackhiPd(vb0, vb1) pc0 = Pointer(dst[dL.of(0), dR.of(0)]) pc1 = Pointer(dst[dL.of(1), dR.of(0)]) instr0 = mmStoreuPd( mmHaddPd(mmMulPd(va0, vbt0), mmMulPd(va0, vbt1)), pc0) instr1 = mmStoreuPd( mmHaddPd(mmMulPd(va1, vbt0), mmMulPd(va1, vbt1)), pc1) instructions += [instr0, instr1] return instructions