def transform(self, py_ast, program_cfg): arg_cfg, tune_cfg = program_cfg tree = PyBasicConversions().visit(py_ast) param_dict = {} tree.body[0].params.append(C.SymbolRef("retval", arg_cfg[0]())) # Annotate arguments for param, type in zip(tree.body[0].params, arg_cfg): param.type = type() param_dict[param.name] = type._dtype_ length = np.prod(arg_cfg[0]._shape_) transformer = MapTransformer("i", param_dict, "retval") body = list(map(transformer.visit, tree.body[0].defn)) tree.body[0].defn = [C.For( C.Assign(C.SymbolRef("i", ct.c_int()), C.Constant(0)), C.Lt(C.SymbolRef("i"), C.Constant(length)), C.PostInc(C.SymbolRef("i")), body=body, pragma="ivdep" )] tree = DeclarationFiller().visit(tree) defns = [] tree = HwachaVectorize(param_dict, defns).visit(tree) file_body = [ StringTemplate("#include <stdlib.h>"), StringTemplate("#include <stdint.h>"), StringTemplate("#include <assert.h>"), StringTemplate("extern \"C\" void __hwacha_body(void);"), ] file_body.extend(defns) file_body.append(tree) return [CFile("generated", file_body)]
def gen_loop_nest(self, loopvars, cfg): body = [] node = C.For( C.Assign(C.SymbolRef(loopvars[0], ct.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loopvars[0]), C.Constant(cfg.shape[0])), C.PostInc(C.SymbolRef(loopvars[0])), body) curr_node = node for loopvar, dim in zip(loopvars[1:], cfg.shape[1:]): curr_node = C.For( C.Assign(C.SymbolRef(loopvar, ct.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loopvar), C.Constant(dim)), C.PostInc(C.SymbolRef(loopvar)), []) body.append(curr_node) body = curr_node.body self.loop_shape_map[loopvars] = cfg.shape return node, curr_node
def block_loop(self, node): loopvar = node.init.left.name loopvar += loopvar self.nest.insert( 0, C.For( C.Assign(C.SymbolRef(loopvar, node.init.left.type), node.init.right), C.Lt(C.SymbolRef(loopvar), node.test.right), C.AddAssign(C.SymbolRef(loopvar), C.Constant(self.block_factor)), [None])) node.init.right = C.SymbolRef(loopvar) node.test.right = C.FunctionCall(C.SymbolRef("fmin"), [ C.Add(C.SymbolRef(loopvar), C.Constant(self.block_factor)), node.test.right ])
def gen_for(loopvar, start, end, body, pragma=""): return C.For( C.Assign(C.SymbolRef(loopvar, ctypes.c_int()), C.Constant(start)), C.Lt(C.SymbolRef(loopvar), C.Constant(end)), C.PostInc(C.SymbolRef(loopvar)), body, pragma)
def visit_For(self, node): for j in range(1, self.factor): UnrollStatementsNoJam.new_body[j] = [] # UnrollStatementsNoJam.new_body={} #for i in node.body: #new_body_cpy = deepcopy(UnrollStatementsNoJam.new_body) #node.body = [self.visit(s) for s in node.body] newbody = [] for s in node.body: temp = deepcopy(UnrollStatementsNoJam.new_body) t = self.visit(s) stmt2 = deepcopy(t) stmt = deepcopy(t) if self.unroll_type == 0: s = util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(0)), stmt) else: s = util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(0)), stmt) newbody.append(t) if not isinstance(t, C.For): for i in range(1, self.factor): stmt = deepcopy(stmt2) if self.unroll_type == 0: if i in UnrollStatementsNoJam.new_body: UnrollStatementsNoJam.new_body[i].append( util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) else: UnrollStatementsNoJam.new_body[i] = [ util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt) ] elif self.unroll_type == 1: if i in UnrollStatementsNoJam.new_body: UnrollStatementsNoJam.new_body[i].append( util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt)) else: UnrollStatementsNoJam.new_body[i] = [ util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt) ] else: assert (false) else: var = t.init.left.name #if var != self.target_var: for j in range(1, self.factor): temp[j].append( C.For( C.Assign(C.SymbolRef(var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(var), C.Constant(t.test.right.value)), C.AddAssign(C.SymbolRef(var), C.Constant(t.incr.value.value)), UnrollStatementsNoJam.new_body[j])) UnrollStatementsNoJam.new_body = deepcopy(temp) node.body = newbody return node
def visit_RangeDim(self, node): iter = node.child_for.iter ensemble = node.ensemble ndim = node.mapping.ndim dim = iter.args[1].n offset = node.mapping.get_offset(dim) step = node.mapping.get_step(dim) length = len(node.mapping.shape[dim]) if isinstance(iter, ast.Call) and iter.func.id == "range_dim": loop_var = node.child_for.target.id body = [] body += [self.visit(s) for s in node.child_for.body] # FIXME: This check does not cover general cases #ANAND-special casing for LRN, needs refactoring if isinstance(self.ensemble, latte.ensemble.LRNEnsemble ) and length < latte.config.SIMDWIDTH: if ( self.direction == "forward" and "inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["inputs"]) ) or (self.direction in ["backward", "update_internal"] and "grad_inputs" in self.ensemble.tiling_info and any( dim == x[0] for x in self.ensemble.tiling_info["grad_inputs"])): body = [ UpdateInputIndices( loop_var + "_outer", C.Div( C.Add( C.SymbolRef(loop_var), C.SymbolRef( "_input_offset_{}_inner".format(dim + 1))), C.Constant(latte.config.SIMDWIDTH))).visit(s) for s in body ] body = [ UpdateInputIndices( "_input_offset_{}_inner".format(dim + 1), C.Constant(0)).visit(s) for s in body ] body = [ UpdateInputIndices( loop_var + "_inner", C.Mod( C.Add( C.SymbolRef(loop_var), C.SymbolRef( "_input_offset_{}_inner".format(dim + 1))), C.Constant(latte.config.SIMDWIDTH))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) else: body = [ UpdateInputIndices( loop_var, C.Mul(C.SymbolRef(loop_var), C.Constant(step))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) elif ( self.direction == "forward" and "inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["inputs"]) ) or (self.direction in ["backward", "update_internal"] and "grad_inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["grad_inputs"])): outer_loop = C.For( C.Assign(C.SymbolRef(loop_var + "_outer", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_outer"), C.Constant(length // latte.config.SIMDWIDTH)), C.AddAssign(C.SymbolRef(loop_var + "_outer"), C.Constant(1)), []) self.tiled_loops.append(outer_loop) if self.direction == "forward" and length < latte.config.SIMDWIDTH: inner_loop = C.For( C.Assign( C.SymbolRef(loop_var + "_inner", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_inner"), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var + "_inner"), C.Constant(1)), body, ) else: inner_loop = C.For( C.Assign( C.SymbolRef(loop_var + "_inner", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_inner"), C.Constant(latte.config.SIMDWIDTH)), C.AddAssign(C.SymbolRef(loop_var + "_inner"), C.Constant(1)), body, ) return inner_loop else: body = [ UpdateInputIndices( loop_var, C.Mul(C.SymbolRef(loop_var), C.Constant(step))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) raise NotImplementedError()