Beispiel #1
0
 def gen_local_macro(self):
     dim = len(self.output_grid.shape)
     index = SymbolRef("d%d" % (dim - 1))
     for d in reversed(range(dim - 1)):
         base = Add(get_local_size(dim - 1),
                    Constant(2 * self.ghost_depth[dim - 1]))
         for s in range(d + 1, dim - 1):
             base = Mul(
                 base,
                 Add(get_local_size(s), Constant(2 * self.ghost_depth[s]))
             )
         index = Add(
             index, Mul(base, SymbolRef("d%d" % d))
         )
         index._force_parentheses = True
         index.right.right._force_parentheses = True
     return index
Beispiel #2
0
 def local_array_macro(self, point):
     dim = len(self.output_grid.shape)
     index = get_local_id(dim)
     for d in reversed(range(dim)):
         index = Add(
             Mul(
                 index,
                 Add(
                     get_local_size(d),
                     Constant(2 * self.ghost_depth[d])
                 ),
             ),
             point[d]
         )
     return FunctionCall(SymbolRef("local_array_macro"), point)
Beispiel #3
0
def gen_decls(dim, ghost_depth):
    thread_id = get_local_id(dim - 1)
    num_threads = get_local_size(dim - 1)
    block_size = Add(
        get_local_size(dim - 1),
        Constant(ghost_depth[dim - 1] * 2)
    )
    for d in reversed(range(0, dim - 1)):
        base = get_local_size(dim - 1)
        for s in range(d, dim - 2):
            base = Mul(get_local_size(s + 1), base)

        thread_id = Add(
            Mul(get_local_id(d), base),
            thread_id
        )
        num_threads = Mul(get_local_size(d), num_threads)
        block_size = Mul(
            Add(get_local_size(d), Constant(ghost_depth[d] * 2)),
            block_size
        )
    return thread_id, num_threads, block_size
Beispiel #4
0
    def load_shared_memory_block(self, target, ghost_depth):
        dim = len(self.output_grid.shape)
        body = []
        thread_id, num_threads, block_size = gen_decls(dim, ghost_depth)

        body.extend([Assign(SymbolRef("thread_id", ct.c_int()), thread_id),
                     Assign(SymbolRef("block_size", ct.c_int()), block_size),
                     Assign(SymbolRef("num_threads", ct.c_int()), num_threads)
                     ])
        base = None
        for i in reversed(range(0, dim - 1)):
            if base is not None:
                base = Mul(Add(get_local_size(i + 1),
                               Constant(self.ghost_depth[i + 1] * 2)),
                           base)
            else:
                base = Add(get_local_size(i + 1),
                           Constant(self.ghost_depth[i + 1] * 2))
        if base is not None:
            local_indices = [
                Assign(
                    SymbolRef("local_id%d" % (dim - 1), ct.c_int()),
                    Div(SymbolRef('tid'), base)
                ),
                Assign(
                    SymbolRef("r_%d" % (dim - 1), ct.c_int()),
                    Mod(SymbolRef('tid'), base)
                )
            ]
        else:
            local_indices = [
                Assign(
                    SymbolRef("local_id%d" % (dim - 1), ct.c_int()),
                    SymbolRef('tid')
                ),
                Assign(
                    SymbolRef("r_%d" % (dim - 1), ct.c_int()),
                    SymbolRef('tid')
                )
            ]
        for d in reversed(range(0, dim - 1)):
            base = None
            for i in reversed(range(d + 1, dim)):
                if base is not None:
                    base = Mul(
                        Add(get_local_size(i),
                            ghost_depth[i] * 2),
                        base
                    )
                else:
                    base = Add(get_local_size(i), Constant(ghost_depth[i] * 2))
            if base is not None and d != 0:
                local_indices.append(
                    Assign(
                        SymbolRef("local_id%d" % d, ct.c_int()),
                        Div(SymbolRef('r_%d' % (d + 1)), base)
                    )
                )
                local_indices.append(
                    Assign(
                        SymbolRef("r_%d" % d, ct.c_int()),
                        Mod(SymbolRef('r_%d' % (d + 1)), base)
                    )
                )
            else:
                local_indices.append(
                    Assign(
                        SymbolRef("local_id%d" % d, ct.c_int()),
                        SymbolRef('r_%d' % (d + 1))
                    )
                )
        body.append(
            For(
                Assign(SymbolRef('tid', ct.c_int()), SymbolRef('thread_id')),
                Lt(SymbolRef('tid'), SymbolRef('block_size')),
                AddAssign(SymbolRef('tid'), SymbolRef('num_threads')),
                local_indices + [Assign(
                    ArrayRef(
                        target,
                        SymbolRef('tid')
                    ),
                    ArrayRef(
                        SymbolRef(self.input_names[0]),
                        self.global_array_macro(
                            [FunctionCall(
                                SymbolRef('clamp'),
                                [Cast(ct.c_int(), Sub(Add(
                                    SymbolRef("local_id%d" % (dim - d - 1)),
                                    Mul(FunctionCall(
                                        SymbolRef('get_group_id'),
                                        [Constant(d)]),
                                        get_local_size(d))
                                ), Constant(self.kernel.ghost_depth[d]))),
                                    Constant(0), Constant(
                                        self.arg_cfg[0].shape[d]-1
                                    )
                                ]
                            ) for d in range(0, dim)]
                        )
                    )
                )]
            )
        )
        return body