Beispiel #1
0
 def declare_gridref(self, grid: Grid, block: CodeBlock):
     name = self.gridref_name(grid)
     block.append(
         ("__global " if self.ocl else "") + "bElem *{} = &{};".format(
             name, self.layout.elem(grid, [0] *
                                    len(self.codegen.TILE_DIM))))
     return name
Beispiel #2
0
 def merge(self, rego, regl, regr, dim, shift, block: CodeBlock):
     block.append("// merge{} {} ,{}, {} -> {}".format(
         dim, regl, regr, shift, rego))
     if dim > 0:
         raise RuntimeError(
             "Cannot merge on dimension {} for SSE".format(dim))
     block.append("{} = _mm_alignr_epi8({}, {}, {});".format(
         rego, regr, regl, shift * self.prec * 4))
Beispiel #3
0
 def merge(self, rego, regl, regr, dim, shift, block: CodeBlock):
     block.append("// merge{} {} ,{}, {} -> {}".format(
         dim, regl, regr, shift, rego))
     if dim > 0:
         raise RuntimeError(
             "Cannot merge on dimension {} for SVE".format(dim))
     block.append("{} = svext_u32({}, {}, {});".format(
         rego, regl, regr, shift * self.prec))
Beispiel #4
0
 def declare_buf(self, buf: Buffer, block: CodeBlock):
     space = 1
     for a, b in buf.iteration:
         space *= b - a
     # block.append("vfloat{} {};".format(space, buf.name))
     align = self.layout.prec * 4 * space
     align = 64 if align >= 64 else align
     block.append("bElem {}[{}] __attribute__((aligned({})));".format(buf.name, space, align))
     return buf.name
Beispiel #5
0
 def read_aligned(self,
                  grid: Grid,
                  offset,
                  name: str,
                  block: CodeBlock,
                  rel=None):
     if rel is not None:
         rel = [rel]
     block.append("{} = {};".format(name,
                                    self.layout.elem(grid, offset, rel)))
Beispiel #6
0
 def read_aligned(self,
                  grid: Grid,
                  offset,
                  name: str,
                  block: CodeBlock,
                  rel=None):
     block.append("// read {} -> {}".format(str(offset), name))
     if rel is not None:
         rel = [rel]
     block.append("{} = _mm256_load_si256((__m256i *) & {});".format(
         name, self.layout.elem(grid, offset, rel)))
Beispiel #7
0
 def read_aligned(self,
                  grid: Grid,
                  offset,
                  name: str,
                  block: CodeBlock,
                  rel=None):
     block.append("// read {} -> {}".format(str(offset), name))
     if rel is not None:
         rel = [rel]
     block.append("{} = vld1q_u32((uint32_t *) & {});".format(
         name, self.layout.elem(grid, offset, rel)))
Beispiel #8
0
 def merge(self, rego, regl, regr, dim, shift, block: CodeBlock):
     block.append("// merge{} {} ,{}, {} -> {}".format(
         dim, regl, regr, shift, rego))
     l = 1
     for i in range(dim):
         l *= self.codegen.FOLD[i]
     ll = l * self.codegen.FOLD[dim]
     if ll == self.VECLEN:
         lid = self.LID
     else:
         lid = "{} & {}".format(self.LID, ll - 1)
     block.append("dev_shl({}, {}, {}, {}, {}, {});".format(
         rego, regl, regr, (self.codegen.FOLD[dim] - shift) * l,
         l * self.codegen.FOLD[dim], lid))
Beispiel #9
0
 def read_aligned(self,
                  grid: Grid,
                  offset,
                  name: str,
                  block: CodeBlock,
                  rel=None):
     import st.expr
     ref = [None] * len(offset)
     ref[-1] = st.expr.ConstRef(self.LID)
     if isinstance(rel, list):
         nrel = rel[:]
         if nrel[-1]:
             nrel[-1] += ref[-1]
         else:
             nrel[-1] = ref[-1]
         ref = nrel
     elif rel:
         ref[-1] = ref[-1] + rel * self.VECLEN
     block.append("{} = {};".format(name,
                                    self.layout.elem(grid, offset, ref)))
Beispiel #10
0
 def merge(self, rego, regl, regr, dim, shift, block: CodeBlock):
     block.append("// merge{} {} ,{}, {} -> {}".format(
         dim, regl, regr, shift, rego))
     if dim > 1:
         raise RuntimeError(
             "Cannot merge on dimension {} for AVX2".format(dim))
     if dim == 1:
         block.append("{} = _mm256_permute2x128_si256({}, {}, 3);".format(
             rego, regr, regl))
     elif dim == 0:
         block.append("{} = _mm256_alignr_epi8({}, {}, {});".format(
             rego, regr, regl, shift * self.prec * 4))
Beispiel #11
0
    def merge(self, rego, regl, regr, dim, shift, block: CodeBlock):
        block.append("// merge{} {} ,{}, {} -> {}".format(dim, regl, regr, shift, rego))
        l = 1

        for i in range(dim):
            l *= self.codegen.FOLD[i]
        if l * self.codegen.FOLD[dim] == self.VECLEN:
            # this only requires a shift
            sh = shift * l * self.prec
            block.append("{} = _mm512_alignr_epi32({}, {}, {});".format(rego, regr, regl, sh))
        else:
            # this requires masking
            sh = (self.VECLEN - (self.codegen.FOLD[dim] - shift) * l) * self.prec
            block.append("{} = _mm512_alignr_epi32({}, {}, {});".format(
                rego, regr, regr, sh))
            sh = shift * l * self.prec
            mask = genmask(self.codegen.FOLD, dim, shift, self.prec, self.VECLEN)
            block.append("{} = _mm512_mask_alignr_epi32({}, {}, {}, {}, {});".format(
                rego, rego, mask, regl, regl, sh))
Beispiel #12
0
 def declare_buf(self, buf: Buffer, block: CodeBlock):
     space = 1
     for a, b in buf.iteration:
         space *= b - a
     block.append("vfloat{} {};".format(space, buf.name))
     return buf.name
Beispiel #13
0
 def store_vecbuf(self, vecbuf_name, reg_name, block: CodeBlock):
     block.append("_mm256_store_si256((__m256i *) & {}[0], {});".format(
         reg_name, vecbuf_name))
Beispiel #14
0
 def declare_vec(self, name, block: CodeBlock):
     block.append("svuint32_t {};".format(name))
Beispiel #15
0
 def declare_reg(self, name, block: CodeBlock):
     block.append("vfloat{} {};".format(self.VECLEN, name))
Beispiel #16
0
 def genStoreLoop(self, group: CodeBlock):
     group.append("#pragma omp simd")
     group.append("for (long sti = 0; sti < {}; ++sti)".format(self.codegen.TILE_SIZE))
Beispiel #17
0
 def declare_gridref(self, grid: Grid, block: CodeBlock):
     name = self.gridref_name(grid)
     block.append("bElem *{} = &{};".format(
         name, self.layout.elem(grid, [0] * len(self.codegen.TILE_DIM))))
     block.append("{} = (bElem *)__builtin_assume_aligned({}, 64);".format(name, name))
     return name
Beispiel #18
0
 def declare_vec(self, name, block: CodeBlock):
     block.append("__m512i {};".format(name))
Beispiel #19
0
 def store(self, buf: Buffer, group: CodeBlock):
     group.append("{}[sti * {} + {}] = {}[sti];".format(
         self.gridref_name(buf.grid), self.VECLEN, self.LID, buf.name))
Beispiel #20
0
    def genStoreTileLoop(self, group: CodeBlock, dims):
        subblock = CodeBlock()
        group.append(subblock)
        subblock.append("long rel = 0;")
        for d in range(dims - 1, 0, -1):
            idx_name = self.index_name(d)
            subblock.append("for (long {} = {}; {} < {}; {} += {})".format(
                idx_name, 0, idx_name, self.codegen.TILE_DIM[d], idx_name, 1))
            newlevel = CodeBlock()
            subblock.append(newlevel)
            subblock = newlevel

        rel = self.index_name(0)
        subblock.append("for (long {} = {}; {} < {}; {} += {}, ++rel)".format(
            rel, self.LID, rel, self.codegen.TILE_DIM[0], rel, self.VECLEN))
        return subblock
Beispiel #21
0
 def storeTile(self, buf: Buffer, group: CodeBlock):
     dims = buf.grid.dims
     dimrels = [self.index_name(i) for i in reversed(range(dims))]
     group.append("{} = {}[rel];".format(
         self.layout.elem(buf.grid, [0] * dims, dimrels), buf.name))
Beispiel #22
0
 def genStoreLoop(self, group: CodeBlock):
     group.append("for (long sti = 0; sti < {}; ++sti)".format(
         self.codegen.TILE_SIZE // self.VECLEN))
Beispiel #23
0
 def store_vecbuf(self, vecbuf_name, reg_name, block: CodeBlock):
     block.append("_mm512_store_epi32( & {}[0], {});".format(reg_name, vecbuf_name))
Beispiel #24
0
 def declare_vec(self, name, block: CodeBlock):
     block.append("bElem {};".format(name))
Beispiel #25
0
 def store_vecbuf(self, vecbuf_name, reg_name, block: CodeBlock):
     block.append("vst1q_u32((uint32_t *) & {}[0], {});".format(
         reg_name, vecbuf_name))
Beispiel #26
0
 def store_vecbuf(self, vecbuf_name, reg_name, block: CodeBlock):
     block.append("{} = {};".format(reg_name, vecbuf_name))
Beispiel #27
0
 def genVectorLoop(self, group: CodeBlock):
     group.append("#pragma omp simd")
     group.append("for (long vit = 0; vit < {}; ++vit)".format(self.VECLEN))
     g = CodeBlock()
     group.append(g)
     return g
Beispiel #28
0
 def declare_buf(self, buf: Buffer, block: CodeBlock):
     space = 1
     for a, b in buf.iteration:
         space *= b - a
     block.append("bElem {}[{}];".format(buf.name, space // self.VECLEN))
     return buf.name
Beispiel #29
0
 def declare_reg(self, name, block: CodeBlock):
     align = self.layout.prec * 4 * self.VECLEN
     align = 64 if align >= 64 else align
     block.append("bElem {}[{}] __attribute__((aligned({})));".format(name, self.VECLEN, align))
Beispiel #30
0
 def store_vecbuf(self, vecbuf_name, reg_name, block: CodeBlock):
     block.append(
         "svst1_u32(svptrue_b32(), (uint32_t *) & {}[0], {});".format(
             reg_name, vecbuf_name))