def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_mat_mul_sq8(code_block, self.cname, gen_ctrl, self.colM1, self.lineM1, self.colM2, self.lineM2, self.at_act_params) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_mat_vect_mul_sq8(code_block, self.cname, gen_ctrl, self.feat_dim, self.width, self.height, self.at_act_params.ReLUOper) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_softmax(code_block, self.cname, self.in_q, self.out_q, self.in_dim, self.at_softmax_params, at_ver=self.at_ver) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_cnn_globalpool_fp16(code_block, self.cname, gen_ctrl, self.c, self.h, self.w, self.at_globalpool_params.GlobalPoolOper, self.at_act_params.ReLUOper) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_rnn_sq8(code_block, self.kname, self.cname, gen_ctrl, self.n_cells, self.n_input_cells, self.n_output_cells, self.n_states, self.n_inputs, self.revert and "1" or "0") return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_at_linear_relu(code_block, self.cname, self.bias_q.bits//8, self.mulbiases_q.bits//8, self.in_dim, self.out_dim, self.at_linear_params.LinearOper, self.at_act_params.ReLUOper, at_ver=self.at_ver, gen_ctrl=gen_ctrl) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_matrixadddyn(code_block, self.cname, self.in_q1, self.in_q2, self.out_q, self.in_dim, self.out_dim, self.at_matrixadd_params, gen_ctrl=self.gen_ctrl) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) code_block.write('{}("{}");'.format(self.gen_name, self.cname)) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) code_block.write(f'CNN_SignedUnsigned("{self.cname}", {self.in_type}, {self.out_type}, {self.size});') return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_linear_relu(code_block, self.cname, self.in_q, self.out_q, self.filter_q, self.bias_q, self.in_dim, self.out_dim, self.at_linear_params, self.at_act_params, at_ver=self.at_ver, gen_ctrl=self.gen_ctrl) return code_block
def c_block(self, code_block: CodeBlock = None, iteration_space: IterationSpace = None): if code_block is None: code_block = CodeBlock() if iteration_space: if self.var.name in iteration_space.temporary_variables: code_block.write( f"{self.var.c_expr(declare=True, dtype=self.contents[0].dtype)}" f" = {self.contents[0].c_expr(iteration_space=iteration_space)};") else: code_block.write( f"{self.var.c_expr(dtype=self.contents[0].dtype)}{iteration_space.c_index(self.var.name)}" f" = {self.contents[0].c_expr(iteration_space=iteration_space)};") else: code_block.write(f'{self.var.name} = {self.contents[0].c_expr()};') return code_block
def c_block(self, code_block: CodeBlock = None, iteration_space: IterationSpace = None, with_loops=False): if code_block is None: code_block = CodeBlock() if with_loops: assert iteration_space, "must have space" for idx, _ in enumerate(iteration_space.axis_shape): if idx in iteration_space.fixed_spaces: continue code_block.write(f"{iteration_space.c_for(idx)} {{") code_block.indent() for precondition in self.preconditions: precondition.c_block(code_block=code_block, iteration_space=iteration_space) for item in self.contents: item.c_block(code_block=code_block, iteration_space=iteration_space) if with_loops: for idx, _ in enumerate(iteration_space.axis_shape): if idx in iteration_space.fixed_spaces: continue code_block.deindent() code_block.write("}") return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) code_block.comment("transpose from {} to {} ({})", self.in_dim, self.out_dim, self.real_transpose) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_3d_transpose(code_block, self.cname, self.in_shape, self.permop) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) code_block.comment("transpose from {} to {} ({})", self.in_dim, self.out_dim, self.real_transpose) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_at_2d_transpose(code_block, self.cname, abs(at_bits(self.in_q)), self.in_shape, gen_ctrl=gen_ctrl) return code_block
def __init__(self, node_name, cname, pool_params, pool_q, act_params, act_q, code_block=None, at_ver=3, gen_ctrl=None): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl in_q = out_q = None in_dim = out_dim = None pad_compatibilities = [] if pool_params is not None: at_pool_params = gen_pool_at_params(pool_params, pad_compatibilities) if in_dim is None: in_dim = pool_params.in_dims[0] out_dim = pool_params.out_dims[0] if in_q is None: in_q = pool_q.in_qs[0] out_q = pool_q.out_qs[0] else: at_pool_params = NO_POOL if act_params is not None: at_act_params = gen_active_at_params(act_params) if in_dim is None: in_dim = act_params.in_dims[0] if out_dim is None: out_dim = act_params.out_dims[0] if in_q is None: in_q = act_q.in_qs[0] out_q = act_q.out_qs[0] if at_ver < 3: if act_params.activation == "relu6" and out_q.q != 0: self.gen_ctrl.ReluN = 6 << out_q.q self.gen_ctrl.ReluNNoNorm = 1 else: if act_params.activation == "relun": self.gen_ctrl.ReluN = act_params.activation_params else: at_act_params = NO_ACTIVATION if code_block is None: code_block = CodeBlock() if pad_compatibilities: reduction = PadDim.pad_compatibility_reduce(*pad_compatibilities, "convolution padding is not compatible with pool padding") if not reduction[2]: # default is balanced pad left at_pad_ctrl = next(i for i, v in enumerate(reduction) if v) self.gen_ctrl.PadType = at_pad_ctrl if in_q.bits != out_q.bits: raise NotImplementedError("only homogenious operations are supported at present") if at_pool_params == NO_POOL: raise NotImplementedError( "activation layer on its own should not be matched by this kernel") self.at_pool_params = at_pool_params self.in_dim = in_dim self.out_dim = out_dim self.in_q = in_q self.out_q = out_q self.at_act_params = at_act_params self.cname = cname self.node_name = node_name self.at_ver = at_ver
def gen_user_kernel(self, ukname: str, kname: str, code: CodeBlock = None): if code is None: code = CodeBlock() code.write("int {0}(char *Name) {{", ukname) code.indent() code.write("Kernel_T *Kernel = UserKernel(") code.indent() code.write("Name,") code.write("{0},", self.gen_iterspace()) kargs = self.gen_kargs() code.write("TILE_HOR,") cargs = self.gen_cargs() code.write("CArgs({0},", len(cargs)) code.indent() for carg in cargs[:-1:]: code.write("{0},", carg) code.write("{0}", cargs[-1]) code.deindent() code.write("),") code.write("Calls(1,") code.indent() code.write("Call(\"{0}\", LOC_D{1},", kname, len(self.func_col.iterators) - 1) code.indent() bindings = self.gen_kerbingings() code.write("Bindings({0},", len(bindings)) code.indent() for binding in bindings[:-1:]: code.write("{0},", binding) code.write("{0}", bindings[-1]) code.deindent() code.write(")") code.deindent() code.write(")") code.deindent() code.write("),") code.write("KerArgs({0},", len(cargs)) code.indent() for karg in kargs[:-1:]: code.write("{0},", karg) code.write("{0}", kargs[-1]) code.deindent() code.write(")") code.deindent() code.write(");") code.write("if (Kernel) {") code.indent() for kinfo in self.gen_kinfos(): code.write("{0};", kinfo) code.deindent() code.write("}") code.write("return (Kernel!=0);") code.deindent() code.write("}") return code
def gen_kernel_headers(self, code: CodeBlock = None): if code is None: code = CodeBlock() for include in self._func_col.c_header_set: code.write('#include {}', include) return code
def gen_function(self, kernel_name, kernel_arg_type_name, code=None): if code is None: code = CodeBlock() code.comment("Output iteration space reduced to %s iteration spaces" % (self.kernel_dims)) code.write(f"void {kernel_name}({kernel_arg_type_name} *Args) {{") code.indent() for kerarg_name, kerarg_type in self.kernel_args: code.write('{0} {1} = Args->{1};', kerarg_type, kerarg_name) # paralellize on largest dimension last_first = self.parallel_iterator.name.upper() code.write('unsigned int CoreId = gap_coreid();') code.write('unsigned int Chunk = ChunkSize({});', last_first) code.write('unsigned int First = Chunk*CoreId;') code.write('unsigned int Last = gap_min(First+Chunk, {});', last_first) self._func_col.create_kernel(self.parallel_iterator, self.fixed_iterators, code) code.write('gap_waitbarrier(0);') code.deindent() code.write('}') return code
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) if self.at_conv_params == NO_CONV: if self.in_q.bits != self.out_q.bits: raise NotImplementedError( "only homogenious operations are supported at present") LOG.debug("%s: pool relu inq %s outq %s control block", self.node_name, self.in_q, self.out_q) gen_at_pool_relu(code_block, self.cname, self.in_q, self.out_q, self.in_dim, self.out_dim, self.at_pool_params, self.at_act_params, gen_ctrl=self.gen_ctrl, at_ver=self.at_ver) else: if isinstance(self.at_conv_params, ConvATParam): if self.mul_biases_q is not None: LOG.debug( "%s: mulconv pool relu inq %s outq %s control block", self.node_name, self.in_q, self.out_q) gen_at_mulconv_pool_relu(code_block, self.cname, self.in_q, self.out_q, self.filter_q, self.bias_q, self.mul_biases_q, self.in_dim, self.out_dim, self.at_conv_params, self.at_pool_params, self.at_act_params, gen_ctrl=self.gen_ctrl, at_ver=self.at_ver) else: LOG.debug( "%s: conv pool relu inq %s outq %s control block", self.node_name, self.in_q, self.out_q) gen_at_conv_pool_relu(code_block, self.cname, self.in_q, self.out_q, self.filter_q, self.bias_q, self.in_dim, self.out_dim, self.at_conv_params, self.at_pool_params, self.at_act_params, gen_ctrl=self.gen_ctrl, at_ver=self.at_ver) elif isinstance(self.at_conv_params, GroupedConvATParam): if self.mul_biases_q is not None: LOG.debug( "%s: grouped conv pool relu inq %s outq %s control block", self.node_name, self.in_q, self.out_q) gen_at_grouped_mulconv_pool_relu(code_block, self.cname, self.in_q, self.out_q, self.filter_q, self.bias_q, self.mul_biases_q, self.in_dim, self.out_dim, self.at_conv_params, self.at_pool_params, self.at_act_params, gen_ctrl=self.gen_ctrl, at_ver=self.at_ver) else: LOG.debug( "%s: grouped mulconv pool relu inq %s outq %s control block", self.node_name, self.in_q, self.out_q) gen_at_grouped_conv_pool_relu(code_block, self.cname, self.in_q, self.out_q, self.filter_q, self.bias_q, self.in_dim, self.out_dim, self.at_conv_params, self.at_pool_params, self.at_act_params, gen_ctrl=self.gen_ctrl, at_ver=self.at_ver) else: raise ValueError('Internal error') return code_block
def gen_user_kernel(self, block: NNToolCodeBlock, ukname: str, kname: str): block.write("int {0}(char *Name) {{", ukname) block.indent() block.write("Kernel_T *Kernel = UserKernel(") block.indent() block.write("Name,") block.write("{0},", self.gen_iterspace()) block.write("TILE_HOR,") cargs = self.gen_cargs() block.write("CArgs({0},", len(cargs)) block.indent() for carg in cargs[:-1:]: block.write("{0},", carg) block.write("{0}", cargs[-1]) block.deindent() block.write("),") block.write("Calls(1,") block.indent() block.write("Call(\"{0}\", LOC_LOOP,", kname) block.indent() bindings = self.gen_kerbingings() block.write("Bindings({0},", len(bindings)) block.indent() for binding in bindings[:-1:]: block.write("{0},", binding) block.write("{0}", bindings[-1]) block.deindent() block.write(")") block.deindent() block.write(")") block.deindent() block.write("),") kargs = self.gen_kargs() block.write("KerArgs({0},", len(cargs)) block.indent() for karg in kargs[:-1:]: block.write("{0},", karg) block.write("{0}", kargs[-1]) block.deindent() block.write(")") block.deindent() block.write(");") block.write("if (Kernel) {{") block.indent() for kinfo in self.gen_kinfos(): block.write("{0};", kinfo) block.deindent() block.write("}}") block.write("return (Kernel!=0);") block.deindent() block.write("}}")
def create_kernel(self, code_block=None): if code_block is None: code_block = CodeBlock() execution_order = self.execution_order index_dependencies = { var_name: set([ index for ivar in ivars for index in self._variable_indexes[ivar] ]) for var_name, ivars in self._freevars.items() } for var_name, indexes in index_dependencies.items(): indexes.update(set(self._variable_indexes[var_name])) index_dependencies = { var_name: max(indexes) if indexes else 0 for var_name, indexes in index_dependencies.items() } produced_idx = 0 if self.kernel_dims: for depth in range(self.kernel_dims): if depth == 0: code_block.write( 'for ({0}=First; {1}<Last; {1}++) {{', self.iterators[depth].c_expr(declare=True), self.iterators[depth].c_expr()) else: if depth == 1: code_block.write( 'for ({0}=0; {1}<{2}; {1}++) {{', self.iterators[depth].c_expr(declare=True), self.iterators[depth].c_expr(), "W" if self.kernel_dims == 2 else "H") elif depth == 2: code_block.write( 'for ({0}=0; {1}<W; {1}++) {{', self.iterators[depth].c_expr(declare=True), self.iterators[depth].c_expr()) code_block.indent() produced_idx = self.produce_functions(produced_idx, execution_order, index_dependencies, depth, code_block) for depth in reversed(range(self.kernel_dims)): produced_idx = self.produce_functions(produced_idx, execution_order, index_dependencies, depth, code_block) code_block.deindent() code_block.write('}}') else: produced_idx = self.produce_functions(produced_idx, execution_order, index_dependencies, 0, code_block) code_block.write('') assert produced_idx >= len(execution_order) return code_block
def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" if self.at_conv_params == NO_CONV: pp = self.at_pool_params ap = self.at_act_params gen_cnn_conv_pool_act_fp16(code_block, self.cname, self.in_dim.c, self.out_dim.c, self.in_dim.w, self.in_dim.h, self.bias_q.dtype_bits // 8 if self.bias_q is not None else 0, "KOP_NONE", 0, 0, 0, 0, 0, 0, 0, pp.PoolOper, pp.Fpx, pp.Fpy, pp.Dpx, pp.Dpy, pp.Spx, pp.Spy, pp.PoolPad, ap.ReLUOper, gen_ctrl, at_ver=self.at_ver) else: cp = self.at_conv_params pp = self.at_pool_params ap = self.at_act_params if isinstance(self.at_conv_params, ConvATParam): LOG.debug("%s: conv pool relu inq %s outq %s control block", self.node_name, self.in_q, self.out_q) gen_cnn_conv_pool_act_fp16(code_block, self.cname, self.in_dim.c, self.out_dim.c, self.in_dim.w, self.in_dim.h, self.bias_q.dtype_bits // 8, cp.ConvOper, cp.Fcx, cp.Fcy, cp.Dcx, cp.Dcy, cp.Scx, cp.Scy, cp.ConvPad, pp.PoolOper, pp.Fpx, pp.Fpy, pp.Dpx, pp.Dpy, pp.Spx, pp.Spy, pp.PoolPad, ap.ReLUOper, gen_ctrl, at_ver=self.at_ver) elif isinstance(self.at_conv_params, GroupedConvATParam): LOG.debug( "%s: grouped mulconv pool relu inq %s outq %s control block", self.node_name, self.in_q, self.out_q) gen_cnn_grp_conv_pool_act_fp16(code_block, self.cname, cp.GroupIn, cp.GroupOut, self.in_dim.c, self.out_dim.c, self.in_dim.w, self.in_dim.h, self.bias_q.dtype_bits // 8, cp.ConvOper, cp.Fcx, cp.Fcy, cp.Dcx, cp.Dcy, cp.Scx, cp.Scy, cp.ConvPad, pp.PoolOper, pp.Fpx, pp.Fpy, pp.Dpx, pp.Dpy, pp.Spx, pp.Spy, pp.PoolPad, ap.ReLUOper, gen_ctrl, at_ver=self.at_ver) else: raise ValueError('Internal error') return code_block
def kernel_arg_type_codegen(self, type_name, code=None): if code is None: code = CodeBlock() code.write('typedef struct {') code.indent() for kerarg_name, kerarg_type in self.kernel_args: code.write('{} {};', kerarg_type, kerarg_name) code.deindent() code.write('}} {};', type_name) return code
def gen_kernel_model(self, kernel_name, kernel_arg_type_name, code=None): if code is None: code = CodeBlock() code.write('LibKernelTemplate(') code.indent() code.write('"{}",', kernel_arg_type_name) code.write('CArgs({},', len(self.kernel_args)) code.indent() for idx, (kerarg_name, kerarg_type) in enumerate(self.kernel_args): code.write('TCArg("{}", "{}"){}', kerarg_type, kerarg_name, "," if idx < (len(self.kernel_args) - 1) else '') code.deindent() code.write(')') code.deindent() code.write(');') code.write('LibKernel(') code.indent() code.write('"{}",', kernel_name) code.write('CALL_PARALLEL,') code.write('0,') code.write('"{}",', kernel_arg_type_name) code.write('0') code.deindent() code.write(');') return code
def gen_function(self, kernel_name, kernel_arg_type_name, code=None): if code is None: code = CodeBlock() code.comment("Output iteration space reduced to %s iteration spaces" % (self.kernel_dims)) code.write("void {}({} *Args) {{", kernel_name, kernel_arg_type_name) code.indent() for kerarg_name, kerarg_type in self.kernel_args: code.write('{0} {1} = Args->{1};', kerarg_type, kerarg_name) if self.kernel_dims == 1: last_first = "Sz" code.write('unsigned int Sz = W * H;') elif self.kernel_dims == 2: last_first = "H" elif self.kernel_dims == 3: last_first = "InFeatures" else: raise ValueError("expression has too many dimensions") code.write('unsigned int CoreId = gap_coreid();') code.write('unsigned int Chunk = ChunkSize({});', last_first) code.write('unsigned int First = Chunk*CoreId;') code.write('unsigned int Last = gap_min(First+Chunk, {});', last_first) self._func_col.create_kernel(code) code.write('gap_waitbarrier(0);') code.deindent() code.write('}}') return code