def gen_function(self, kernel_name, kernel_arg_type_name, code=None): if code is None: code = CodeBlock() code.comment("Output iteration space reduced to %s iteration spaces" % (self.kernel_dims)) code.write("void {}({} *Args) {{", kernel_name, kernel_arg_type_name) code.indent() for kerarg_name, kerarg_type in self.kernel_args: code.write('{0} {1} = Args->{1};', kerarg_type, kerarg_name) if self.kernel_dims == 1: last_first = "Sz" code.write('unsigned int Sz = W * H;') elif self.kernel_dims == 2: last_first = "H" elif self.kernel_dims == 3: last_first = "InFeatures" else: raise ValueError("expression has too many dimensions") code.write('unsigned int CoreId = gap_coreid();') code.write('unsigned int Chunk = ChunkSize({});', last_first) code.write('unsigned int First = Chunk*CoreId;') code.write('unsigned int Last = gap_min(First+Chunk, {});', last_first) self._func_col.create_kernel(code) code.write('gap_waitbarrier(0);') code.deindent() code.write('}}') return code
def create_kernel(self, code_block=None): if code_block is None: code_block = CodeBlock() execution_order = self.execution_order index_dependencies = { var_name: set([ index for ivar in ivars for index in self._variable_indexes[ivar] ]) for var_name, ivars in self._freevars.items() } for var_name, indexes in index_dependencies.items(): indexes.update(set(self._variable_indexes[var_name])) index_dependencies = { var_name: max(indexes) if indexes else 0 for var_name, indexes in index_dependencies.items() } produced_idx = 0 if self.kernel_dims: for depth in range(self.kernel_dims): if depth == 0: code_block.write( 'for ({0}=First; {1}<Last; {1}++) {{', self.iterators[depth].c_expr(declare=True), self.iterators[depth].c_expr()) else: if depth == 1: code_block.write( 'for ({0}=0; {1}<{2}; {1}++) {{', self.iterators[depth].c_expr(declare=True), self.iterators[depth].c_expr(), "W" if self.kernel_dims == 2 else "H") elif depth == 2: code_block.write( 'for ({0}=0; {1}<W; {1}++) {{', self.iterators[depth].c_expr(declare=True), self.iterators[depth].c_expr()) code_block.indent() produced_idx = self.produce_functions(produced_idx, execution_order, index_dependencies, depth, code_block) for depth in reversed(range(self.kernel_dims)): produced_idx = self.produce_functions(produced_idx, execution_order, index_dependencies, depth, code_block) code_block.deindent() code_block.write('}}') else: produced_idx = self.produce_functions(produced_idx, execution_order, index_dependencies, 0, code_block) code_block.write('') assert produced_idx >= len(execution_order) return code_block
def kernel_arg_type_codegen(self, type_name, code=None): if code is None: code = CodeBlock() code.write('typedef struct {') code.indent() for kerarg_name, kerarg_type in self.kernel_args: code.write('{} {};', kerarg_type, kerarg_name) code.deindent() code.write('}} {};', type_name) return code
def gen_kernel_model(self, kernel_name, kernel_arg_type_name, code=None): if code is None: code = CodeBlock() code.write('LibKernelTemplate(') code.indent() code.write('"{}",', kernel_arg_type_name) code.write('CArgs({},', len(self.kernel_args)) code.indent() for idx, (kerarg_name, kerarg_type) in enumerate(self.kernel_args): code.write('TCArg("{}", "{}"){}', kerarg_type, kerarg_name, "," if idx < (len(self.kernel_args) - 1) else '') code.deindent() code.write(')') code.deindent() code.write(');') code.write('LibKernel(') code.indent() code.write('"{}",', kernel_name) code.write('CALL_PARALLEL,') code.write('0,') code.write('"{}",', kernel_arg_type_name) code.write('0') code.deindent() code.write(');') return code
def gen_function(self, kernel_name, kernel_arg_type_name, code=None): if code is None: code = CodeBlock() code.comment("Output iteration space reduced to %s iteration spaces" % (self.kernel_dims)) code.write(f"void {kernel_name}({kernel_arg_type_name} *Args) {{") code.indent() for kerarg_name, kerarg_type in self.kernel_args: code.write('{0} {1} = Args->{1};', kerarg_type, kerarg_name) # paralellize on largest dimension last_first = self.parallel_iterator.name.upper() code.write('unsigned int CoreId = gap_coreid();') code.write('unsigned int Chunk = ChunkSize({});', last_first) code.write('unsigned int First = Chunk*CoreId;') code.write('unsigned int Last = gap_min(First+Chunk, {});', last_first) self._func_col.create_kernel(self.parallel_iterator, self.fixed_iterators, code) code.write('gap_waitbarrier(0);') code.deindent() code.write('}') return code
def c_block(self, code_block: CodeBlock = None, iteration_space: IterationSpace = None, with_loops=False): if code_block is None: code_block = CodeBlock() if with_loops: assert iteration_space, "must have space" for idx, _ in enumerate(iteration_space.axis_shape): if idx in iteration_space.fixed_spaces: continue code_block.write(f"{iteration_space.c_for(idx)} {{") code_block.indent() for precondition in self.preconditions: precondition.c_block(code_block=code_block, iteration_space=iteration_space) for item in self.contents: item.c_block(code_block=code_block, iteration_space=iteration_space) if with_loops: for idx, _ in enumerate(iteration_space.axis_shape): if idx in iteration_space.fixed_spaces: continue code_block.deindent() code_block.write("}") return code_block
def gen_user_kernel(self, ukname: str, kname: str, code: CodeBlock = None): if code is None: code = CodeBlock() code.write("int {0}(char *Name) {{", ukname) code.indent() code.write("Kernel_T *Kernel = UserKernel(") code.indent() code.write("Name,") code.write("{0},", self.gen_iterspace()) kargs = self.gen_kargs() code.write("TILE_HOR,") cargs = self.gen_cargs() code.write("CArgs({0},", len(cargs)) code.indent() for carg in cargs[:-1:]: code.write("{0},", carg) code.write("{0}", cargs[-1]) code.deindent() code.write("),") code.write("Calls(1,") code.indent() code.write("Call(\"{0}\", LOC_D{1},", kname, len(self.func_col.iterators) - 1) code.indent() bindings = self.gen_kerbingings() code.write("Bindings({0},", len(bindings)) code.indent() for binding in bindings[:-1:]: code.write("{0},", binding) code.write("{0}", bindings[-1]) code.deindent() code.write(")") code.deindent() code.write(")") code.deindent() code.write("),") code.write("KerArgs({0},", len(cargs)) code.indent() for karg in kargs[:-1:]: code.write("{0},", karg) code.write("{0}", kargs[-1]) code.deindent() code.write(")") code.deindent() code.write(");") code.write("if (Kernel) {") code.indent() for kinfo in self.gen_kinfos(): code.write("{0};", kinfo) code.deindent() code.write("}") code.write("return (Kernel!=0);") code.deindent() code.write("}") return code
def gen_user_kernel(self, block: NNToolCodeBlock, ukname: str, kname: str): block.write("int {0}(char *Name) {{", ukname) block.indent() block.write("Kernel_T *Kernel = UserKernel(") block.indent() block.write("Name,") block.write("{0},", self.gen_iterspace()) block.write("TILE_HOR,") cargs = self.gen_cargs() block.write("CArgs({0},", len(cargs)) block.indent() for carg in cargs[:-1:]: block.write("{0},", carg) block.write("{0}", cargs[-1]) block.deindent() block.write("),") block.write("Calls(1,") block.indent() block.write("Call(\"{0}\", LOC_LOOP,", kname) block.indent() bindings = self.gen_kerbingings() block.write("Bindings({0},", len(bindings)) block.indent() for binding in bindings[:-1:]: block.write("{0},", binding) block.write("{0}", bindings[-1]) block.deindent() block.write(")") block.deindent() block.write(")") block.deindent() block.write("),") kargs = self.gen_kargs() block.write("KerArgs({0},", len(cargs)) block.indent() for karg in kargs[:-1:]: block.write("{0},", karg) block.write("{0}", kargs[-1]) block.deindent() block.write(")") block.deindent() block.write(");") block.write("if (Kernel) {{") block.indent() for kinfo in self.gen_kinfos(): block.write("{0};", kinfo) block.deindent() block.write("}}") block.write("return (Kernel!=0);") block.deindent() block.write("}}")