예제 #1
0
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_mat_mul_sq8(code_block, self.cname, gen_ctrl, self.colM1,
                        self.lineM1, self.colM2, self.lineM2,
                        self.at_act_params)

        return code_block
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_mat_vect_mul_sq8(code_block, self.cname, gen_ctrl, self.feat_dim,
                             self.width, self.height,
                             self.at_act_params.ReLUOper)

        return code_block
예제 #3
0
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_softmax(code_block,
                       self.cname,
                       self.in_q,
                       self.out_q,
                       self.in_dim,
                       self.at_softmax_params,
                       at_ver=self.at_ver)
        return code_block
예제 #4
0
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_cnn_globalpool_fp16(code_block, self.cname, gen_ctrl, self.c,
                                self.h, self.w,
                                self.at_globalpool_params.GlobalPoolOper,
                                self.at_act_params.ReLUOper)
        return code_block
예제 #5
0
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_rnn_sq8(code_block, self.kname, self.cname, gen_ctrl, self.n_cells,
                    self.n_input_cells, self.n_output_cells,
                    self.n_states,
                    self.n_inputs,
                    self.revert and "1" or "0")
        return code_block
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_at_linear_relu(code_block, self.cname, self.bias_q.bits//8, self.mulbiases_q.bits//8,
                           self.in_dim, self.out_dim,
                           self.at_linear_params.LinearOper,
                           self.at_act_params.ReLUOper,
                           at_ver=self.at_ver, gen_ctrl=gen_ctrl)

        return code_block
예제 #7
0
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_matrixadddyn(code_block,
                            self.cname,
                            self.in_q1,
                            self.in_q2,
                            self.out_q,
                            self.in_dim,
                            self.out_dim,
                            self.at_matrixadd_params,
                            gen_ctrl=self.gen_ctrl)

        return code_block
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        code_block.write('{}("{}");'.format(self.gen_name, self.cname))
        return code_block
예제 #9
0
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)
        code_block.write(f'CNN_SignedUnsigned("{self.cname}", {self.in_type}, {self.out_type}, {self.size});')

        return code_block
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_linear_relu(code_block,
                           self.cname,
                           self.in_q,
                           self.out_q,
                           self.filter_q,
                           self.bias_q,
                           self.in_dim,
                           self.out_dim,
                           self.at_linear_params,
                           self.at_act_params,
                           at_ver=self.at_ver,
                           gen_ctrl=self.gen_ctrl)

        return code_block
예제 #11
0
 def c_block(self, code_block: CodeBlock = None, iteration_space: IterationSpace = None):
     if code_block is None:
         code_block = CodeBlock()
     if iteration_space:
         if self.var.name in iteration_space.temporary_variables:
             code_block.write(
                 f"{self.var.c_expr(declare=True, dtype=self.contents[0].dtype)}"
                 f" = {self.contents[0].c_expr(iteration_space=iteration_space)};")
         else:
             code_block.write(
                 f"{self.var.c_expr(dtype=self.contents[0].dtype)}{iteration_space.c_index(self.var.name)}"
                 f" = {self.contents[0].c_expr(iteration_space=iteration_space)};")
     else:
         code_block.write(f'{self.var.name} = {self.contents[0].c_expr()};')
     return code_block
예제 #12
0
 def c_block(self, code_block: CodeBlock = None, iteration_space: IterationSpace = None, with_loops=False):
     if code_block is None:
         code_block = CodeBlock()
     if with_loops:
         assert iteration_space, "must have space"
         for idx, _ in enumerate(iteration_space.axis_shape):
             if idx in iteration_space.fixed_spaces:
                 continue
             code_block.write(f"{iteration_space.c_for(idx)} {{")
             code_block.indent()
     for precondition in self.preconditions:
         precondition.c_block(code_block=code_block,
                              iteration_space=iteration_space)
     for item in self.contents:
         item.c_block(code_block=code_block,
                      iteration_space=iteration_space)
     if with_loops:
         for idx, _ in enumerate(iteration_space.axis_shape):
             if idx in iteration_space.fixed_spaces:
                 continue
             code_block.deindent()
             code_block.write("}")
     return code_block
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)
        code_block.comment("transpose from {} to {} ({})", self.in_dim,
                           self.out_dim, self.real_transpose)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        gen_at_3d_transpose(code_block, self.cname, self.in_shape, self.permop)
        return code_block
예제 #14
0
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)
        code_block.comment("transpose from {} to {} ({})", self.in_dim,
                           self.out_dim, self.real_transpose)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        gen_at_2d_transpose(code_block,
                            self.cname,
                            abs(at_bits(self.in_q)),
                            self.in_shape,
                            gen_ctrl=gen_ctrl)
        return code_block
예제 #15
0
    def __init__(self, node_name, cname, pool_params, pool_q,
                 act_params, act_q, code_block=None, at_ver=3, gen_ctrl=None):
        if gen_ctrl is None:
            self.gen_ctrl = GenCtrl(None, cname=cname)
        else:
            gen_ctrl.cname = cname
            self.gen_ctrl = gen_ctrl

        in_q = out_q = None
        in_dim = out_dim = None
        pad_compatibilities = []

        if pool_params is not None:
            at_pool_params = gen_pool_at_params(pool_params, pad_compatibilities)
            if in_dim is None:
                in_dim = pool_params.in_dims[0]
            out_dim = pool_params.out_dims[0]
            if in_q is None:
                in_q = pool_q.in_qs[0]
            out_q = pool_q.out_qs[0]
        else:
            at_pool_params = NO_POOL

        if act_params is not None:
            at_act_params = gen_active_at_params(act_params)
            if in_dim is None:
                in_dim = act_params.in_dims[0]
            if out_dim is None:
                out_dim = act_params.out_dims[0]
            if in_q is None:
                in_q = act_q.in_qs[0]
            out_q = act_q.out_qs[0]
            if at_ver < 3:
                if act_params.activation == "relu6" and out_q.q != 0:
                    self.gen_ctrl.ReluN = 6 << out_q.q
                    self.gen_ctrl.ReluNNoNorm = 1
            else:
                if act_params.activation == "relun":
                    self.gen_ctrl.ReluN = act_params.activation_params
        else:
            at_act_params = NO_ACTIVATION

        if code_block is None:
            code_block = CodeBlock()

        if pad_compatibilities:
            reduction = PadDim.pad_compatibility_reduce(*pad_compatibilities,
                                                        "convolution padding is not compatible with pool padding")
            if not reduction[2]:  # default is balanced pad left
                at_pad_ctrl = next(i for i, v in enumerate(reduction) if v)
                self.gen_ctrl.PadType = at_pad_ctrl

        if in_q.bits != out_q.bits:
            raise NotImplementedError("only homogenious operations are supported at present")
        if at_pool_params == NO_POOL:
            raise NotImplementedError(
                "activation layer on its own should not be matched by this kernel")

        self.at_pool_params = at_pool_params
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.in_q = in_q
        self.out_q = out_q
        self.at_act_params = at_act_params
        self.cname = cname
        self.node_name = node_name
        self.at_ver = at_ver
예제 #16
0
 def gen_user_kernel(self, ukname: str, kname: str, code: CodeBlock = None):
     if code is None:
         code = CodeBlock()
     code.write("int {0}(char *Name) {{", ukname)
     code.indent()
     code.write("Kernel_T *Kernel = UserKernel(")
     code.indent()
     code.write("Name,")
     code.write("{0},", self.gen_iterspace())
     kargs = self.gen_kargs()
     code.write("TILE_HOR,")
     cargs = self.gen_cargs()
     code.write("CArgs({0},", len(cargs))
     code.indent()
     for carg in cargs[:-1:]:
         code.write("{0},", carg)
     code.write("{0}", cargs[-1])
     code.deindent()
     code.write("),")
     code.write("Calls(1,")
     code.indent()
     code.write("Call(\"{0}\", LOC_D{1},", kname,
                len(self.func_col.iterators) - 1)
     code.indent()
     bindings = self.gen_kerbingings()
     code.write("Bindings({0},", len(bindings))
     code.indent()
     for binding in bindings[:-1:]:
         code.write("{0},", binding)
     code.write("{0}", bindings[-1])
     code.deindent()
     code.write(")")
     code.deindent()
     code.write(")")
     code.deindent()
     code.write("),")
     code.write("KerArgs({0},", len(cargs))
     code.indent()
     for karg in kargs[:-1:]:
         code.write("{0},", karg)
     code.write("{0}", kargs[-1])
     code.deindent()
     code.write(")")
     code.deindent()
     code.write(");")
     code.write("if (Kernel) {")
     code.indent()
     for kinfo in self.gen_kinfos():
         code.write("{0};", kinfo)
     code.deindent()
     code.write("}")
     code.write("return (Kernel!=0);")
     code.deindent()
     code.write("}")
     return code
예제 #17
0
 def gen_kernel_headers(self, code: CodeBlock = None):
     if code is None:
         code = CodeBlock()
     for include in self._func_col.c_header_set:
         code.write('#include {}', include)
     return code
예제 #18
0
    def gen_function(self, kernel_name, kernel_arg_type_name, code=None):
        if code is None:
            code = CodeBlock()

        code.comment("Output iteration space reduced to %s iteration spaces" %
                     (self.kernel_dims))
        code.write(f"void {kernel_name}({kernel_arg_type_name} *Args) {{")
        code.indent()
        for kerarg_name, kerarg_type in self.kernel_args:
            code.write('{0} {1} = Args->{1};', kerarg_type, kerarg_name)
        # paralellize on largest dimension
        last_first = self.parallel_iterator.name.upper()
        code.write('unsigned int CoreId = gap_coreid();')
        code.write('unsigned int Chunk = ChunkSize({});', last_first)
        code.write('unsigned int First = Chunk*CoreId;')
        code.write('unsigned int Last = gap_min(First+Chunk, {});', last_first)
        self._func_col.create_kernel(self.parallel_iterator,
                                     self.fixed_iterators, code)
        code.write('gap_waitbarrier(0);')
        code.deindent()
        code.write('}')
        return code
예제 #19
0
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)

        if self.at_conv_params == NO_CONV:
            if self.in_q.bits != self.out_q.bits:
                raise NotImplementedError(
                    "only homogenious operations are supported at present")
            LOG.debug("%s: pool relu inq %s outq %s control block",
                      self.node_name, self.in_q, self.out_q)
            gen_at_pool_relu(code_block,
                             self.cname,
                             self.in_q,
                             self.out_q,
                             self.in_dim,
                             self.out_dim,
                             self.at_pool_params,
                             self.at_act_params,
                             gen_ctrl=self.gen_ctrl,
                             at_ver=self.at_ver)
        else:
            if isinstance(self.at_conv_params, ConvATParam):
                if self.mul_biases_q is not None:
                    LOG.debug(
                        "%s: mulconv pool relu inq %s outq %s control block",
                        self.node_name, self.in_q, self.out_q)
                    gen_at_mulconv_pool_relu(code_block,
                                             self.cname,
                                             self.in_q,
                                             self.out_q,
                                             self.filter_q,
                                             self.bias_q,
                                             self.mul_biases_q,
                                             self.in_dim,
                                             self.out_dim,
                                             self.at_conv_params,
                                             self.at_pool_params,
                                             self.at_act_params,
                                             gen_ctrl=self.gen_ctrl,
                                             at_ver=self.at_ver)
                else:
                    LOG.debug(
                        "%s: conv pool relu inq %s outq %s control block",
                        self.node_name, self.in_q, self.out_q)
                    gen_at_conv_pool_relu(code_block,
                                          self.cname,
                                          self.in_q,
                                          self.out_q,
                                          self.filter_q,
                                          self.bias_q,
                                          self.in_dim,
                                          self.out_dim,
                                          self.at_conv_params,
                                          self.at_pool_params,
                                          self.at_act_params,
                                          gen_ctrl=self.gen_ctrl,
                                          at_ver=self.at_ver)
            elif isinstance(self.at_conv_params, GroupedConvATParam):
                if self.mul_biases_q is not None:
                    LOG.debug(
                        "%s: grouped conv pool relu inq %s outq %s control block",
                        self.node_name, self.in_q, self.out_q)
                    gen_at_grouped_mulconv_pool_relu(code_block,
                                                     self.cname,
                                                     self.in_q,
                                                     self.out_q,
                                                     self.filter_q,
                                                     self.bias_q,
                                                     self.mul_biases_q,
                                                     self.in_dim,
                                                     self.out_dim,
                                                     self.at_conv_params,
                                                     self.at_pool_params,
                                                     self.at_act_params,
                                                     gen_ctrl=self.gen_ctrl,
                                                     at_ver=self.at_ver)
                else:
                    LOG.debug(
                        "%s: grouped mulconv pool relu inq %s outq %s control block",
                        self.node_name, self.in_q, self.out_q)
                    gen_at_grouped_conv_pool_relu(code_block,
                                                  self.cname,
                                                  self.in_q,
                                                  self.out_q,
                                                  self.filter_q,
                                                  self.bias_q,
                                                  self.in_dim,
                                                  self.out_dim,
                                                  self.at_conv_params,
                                                  self.at_pool_params,
                                                  self.at_act_params,
                                                  gen_ctrl=self.gen_ctrl,
                                                  at_ver=self.at_ver)
            else:
                raise ValueError('Internal error')

        return code_block
예제 #20
0
 def gen_user_kernel(self, block: NNToolCodeBlock, ukname: str, kname: str):
     block.write("int {0}(char *Name) {{", ukname)
     block.indent()
     block.write("Kernel_T *Kernel = UserKernel(")
     block.indent()
     block.write("Name,")
     block.write("{0},", self.gen_iterspace())
     block.write("TILE_HOR,")
     cargs = self.gen_cargs()
     block.write("CArgs({0},", len(cargs))
     block.indent()
     for carg in cargs[:-1:]:
         block.write("{0},", carg)
     block.write("{0}", cargs[-1])
     block.deindent()
     block.write("),")
     block.write("Calls(1,")
     block.indent()
     block.write("Call(\"{0}\", LOC_LOOP,", kname)
     block.indent()
     bindings = self.gen_kerbingings()
     block.write("Bindings({0},", len(bindings))
     block.indent()
     for binding in bindings[:-1:]:
         block.write("{0},", binding)
     block.write("{0}", bindings[-1])
     block.deindent()
     block.write(")")
     block.deindent()
     block.write(")")
     block.deindent()
     block.write("),")
     kargs = self.gen_kargs()
     block.write("KerArgs({0},", len(cargs))
     block.indent()
     for karg in kargs[:-1:]:
         block.write("{0},", karg)
     block.write("{0}", kargs[-1])
     block.deindent()
     block.write(")")
     block.deindent()
     block.write(");")
     block.write("if (Kernel) {{")
     block.indent()
     for kinfo in self.gen_kinfos():
         block.write("{0};", kinfo)
     block.deindent()
     block.write("}}")
     block.write("return (Kernel!=0);")
     block.deindent()
     block.write("}}")
예제 #21
0
    def create_kernel(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()
        execution_order = self.execution_order
        index_dependencies = {
            var_name: set([
                index for ivar in ivars
                for index in self._variable_indexes[ivar]
            ])
            for var_name, ivars in self._freevars.items()
        }
        for var_name, indexes in index_dependencies.items():
            indexes.update(set(self._variable_indexes[var_name]))
        index_dependencies = {
            var_name: max(indexes) if indexes else 0
            for var_name, indexes in index_dependencies.items()
        }
        produced_idx = 0
        if self.kernel_dims:
            for depth in range(self.kernel_dims):
                if depth == 0:
                    code_block.write(
                        'for ({0}=First; {1}<Last; {1}++) {{',
                        self.iterators[depth].c_expr(declare=True),
                        self.iterators[depth].c_expr())
                else:
                    if depth == 1:
                        code_block.write(
                            'for ({0}=0; {1}<{2}; {1}++) {{',
                            self.iterators[depth].c_expr(declare=True),
                            self.iterators[depth].c_expr(),
                            "W" if self.kernel_dims == 2 else "H")
                    elif depth == 2:
                        code_block.write(
                            'for ({0}=0; {1}<W; {1}++) {{',
                            self.iterators[depth].c_expr(declare=True),
                            self.iterators[depth].c_expr())
                code_block.indent()
                produced_idx = self.produce_functions(produced_idx,
                                                      execution_order,
                                                      index_dependencies,
                                                      depth, code_block)
            for depth in reversed(range(self.kernel_dims)):
                produced_idx = self.produce_functions(produced_idx,
                                                      execution_order,
                                                      index_dependencies,
                                                      depth, code_block)

                code_block.deindent()
                code_block.write('}}')
        else:
            produced_idx = self.produce_functions(produced_idx,
                                                  execution_order,
                                                  index_dependencies, 0,
                                                  code_block)

        code_block.write('')
        assert produced_idx >= len(execution_order)
        return code_block
    def code(self, code_block=None):
        if code_block is None:
            code_block = CodeBlock()

        code_block.comment("generator for {}", self.node_name)

        if not self.gen_ctrl.is_unmodified:
            self.gen_ctrl.gen_ctrl_decl(code_block)
            gen_ctrl = self.gen_ctrl.ctrl_name
        else:
            gen_ctrl = "0"

        if self.at_conv_params == NO_CONV:
            pp = self.at_pool_params
            ap = self.at_act_params
            gen_cnn_conv_pool_act_fp16(code_block,
                                       self.cname,
                                       self.in_dim.c,
                                       self.out_dim.c,
                                       self.in_dim.w,
                                       self.in_dim.h,
                                       self.bias_q.dtype_bits //
                                       8 if self.bias_q is not None else 0,
                                       "KOP_NONE",
                                       0,
                                       0,
                                       0,
                                       0,
                                       0,
                                       0,
                                       0,
                                       pp.PoolOper,
                                       pp.Fpx,
                                       pp.Fpy,
                                       pp.Dpx,
                                       pp.Dpy,
                                       pp.Spx,
                                       pp.Spy,
                                       pp.PoolPad,
                                       ap.ReLUOper,
                                       gen_ctrl,
                                       at_ver=self.at_ver)
        else:
            cp = self.at_conv_params
            pp = self.at_pool_params
            ap = self.at_act_params
            if isinstance(self.at_conv_params, ConvATParam):
                LOG.debug("%s: conv pool relu inq %s outq %s control block",
                          self.node_name, self.in_q, self.out_q)
                gen_cnn_conv_pool_act_fp16(code_block,
                                           self.cname,
                                           self.in_dim.c,
                                           self.out_dim.c,
                                           self.in_dim.w,
                                           self.in_dim.h,
                                           self.bias_q.dtype_bits // 8,
                                           cp.ConvOper,
                                           cp.Fcx,
                                           cp.Fcy,
                                           cp.Dcx,
                                           cp.Dcy,
                                           cp.Scx,
                                           cp.Scy,
                                           cp.ConvPad,
                                           pp.PoolOper,
                                           pp.Fpx,
                                           pp.Fpy,
                                           pp.Dpx,
                                           pp.Dpy,
                                           pp.Spx,
                                           pp.Spy,
                                           pp.PoolPad,
                                           ap.ReLUOper,
                                           gen_ctrl,
                                           at_ver=self.at_ver)
            elif isinstance(self.at_conv_params, GroupedConvATParam):
                LOG.debug(
                    "%s: grouped mulconv pool relu inq %s outq %s control block",
                    self.node_name, self.in_q, self.out_q)
                gen_cnn_grp_conv_pool_act_fp16(code_block,
                                               self.cname,
                                               cp.GroupIn,
                                               cp.GroupOut,
                                               self.in_dim.c,
                                               self.out_dim.c,
                                               self.in_dim.w,
                                               self.in_dim.h,
                                               self.bias_q.dtype_bits // 8,
                                               cp.ConvOper,
                                               cp.Fcx,
                                               cp.Fcy,
                                               cp.Dcx,
                                               cp.Dcy,
                                               cp.Scx,
                                               cp.Scy,
                                               cp.ConvPad,
                                               pp.PoolOper,
                                               pp.Fpx,
                                               pp.Fpy,
                                               pp.Dpx,
                                               pp.Dpy,
                                               pp.Spx,
                                               pp.Spy,
                                               pp.PoolPad,
                                               ap.ReLUOper,
                                               gen_ctrl,
                                               at_ver=self.at_ver)
            else:
                raise ValueError('Internal error')

        return code_block
예제 #23
0
 def kernel_arg_type_codegen(self, type_name, code=None):
     if code is None:
         code = CodeBlock()
     code.write('typedef struct {')
     code.indent()
     for kerarg_name, kerarg_type in self.kernel_args:
         code.write('{} {};', kerarg_type, kerarg_name)
     code.deindent()
     code.write('}} {};', type_name)
     return code
예제 #24
0
    def gen_kernel_model(self, kernel_name, kernel_arg_type_name, code=None):
        if code is None:
            code = CodeBlock()
        code.write('LibKernelTemplate(')
        code.indent()
        code.write('"{}",', kernel_arg_type_name)
        code.write('CArgs({},', len(self.kernel_args))
        code.indent()
        for idx, (kerarg_name, kerarg_type) in enumerate(self.kernel_args):
            code.write('TCArg("{}", "{}"){}', kerarg_type, kerarg_name,
                       "," if idx < (len(self.kernel_args) - 1) else '')
        code.deindent()
        code.write(')')
        code.deindent()
        code.write(');')
        code.write('LibKernel(')
        code.indent()
        code.write('"{}",', kernel_name)
        code.write('CALL_PARALLEL,')
        code.write('0,')
        code.write('"{}",', kernel_arg_type_name)
        code.write('0')
        code.deindent()
        code.write(');')

        return code
예제 #25
0
    def gen_function(self, kernel_name, kernel_arg_type_name, code=None):
        if code is None:
            code = CodeBlock()

        code.comment("Output iteration space reduced to %s iteration spaces" % (self.kernel_dims))
        code.write("void {}({} *Args) {{", kernel_name, kernel_arg_type_name)
        code.indent()
        for kerarg_name, kerarg_type in self.kernel_args:
            code.write('{0} {1} = Args->{1};', kerarg_type, kerarg_name)
        if self.kernel_dims == 1:
            last_first = "Sz"
            code.write('unsigned int Sz = W * H;')
        elif self.kernel_dims == 2:
            last_first = "H"
        elif self.kernel_dims == 3:
            last_first = "InFeatures"
        else:
            raise ValueError("expression has too many dimensions")

        code.write('unsigned int CoreId = gap_coreid();')
        code.write('unsigned int Chunk = ChunkSize({});', last_first)
        code.write('unsigned int First = Chunk*CoreId;')
        code.write('unsigned int Last = gap_min(First+Chunk, {});', last_first)
        self._func_col.create_kernel(code)
        code.write('gap_waitbarrier(0);')
        code.deindent()
        code.write('}}')
        return code