def visit_For(self, node): node.body = [self.visit(s) for s in node.body] if node.init.left.name == self.target_var: if self.unroll_type == 0: node.incr = C.AddAssign(C.SymbolRef(self.target_var), C.Constant(self.factor)) node.incr = C.AddAssign(C.SymbolRef(self.target_var), C.Constant(self.factor)) elif self.unroll_type == 1: assert (node.test.right.value % self.factor == 0) node.test.right.value = node.test.right.value // self.factor else: assert (0) visitor = UnrollStatements(self.target_var, self.factor, self.unroll_type) node.body = util.flatten([visitor.visit(s) for s in node.body]) return node
def visit_For(self, node): node.body = [self.visit(s) for s in node.body] if node.init.left.name == self.target_var: node.incr = C.AddAssign(C.SymbolRef(self.target_var), C.Constant(self.factor)) visitor = UnrollStatements(self.target_var, self.factor) node.body = util.flatten([visitor.visit(s) for s in node.body]) if node.test.right.value == self.factor: return [ util.replace_symbol(node.init.left.name, C.Constant(0), s) for s in node.body ] return node
def block_loop(self, node): loopvar = node.init.left.name loopvar += loopvar self.nest.insert( 0, C.For( C.Assign(C.SymbolRef(loopvar, node.init.left.type), node.init.right), C.Lt(C.SymbolRef(loopvar), node.test.right), C.AddAssign(C.SymbolRef(loopvar), C.Constant(self.block_factor)), [None])) node.init.right = C.SymbolRef(loopvar) node.test.right = C.FunctionCall(C.SymbolRef("fmin"), [ C.Add(C.SymbolRef(loopvar), C.Constant(self.block_factor)), node.test.right ])
def visit_FunctionDecl(self, node): new_body = [] count = 0 for statement in node.defn: if isinstance(statement, ast.For) or isinstance(statement, C.For): pre = C.SubAssign(C.ArrayRef(C.SymbolRef('times'), C.Constant(count)),C.FunctionCall('omp_get_wtime', [])) post = C.AddAssign(C.ArrayRef(C.SymbolRef('times'), C.Constant(count)),C.FunctionCall('omp_get_wtime', [])) new_body.append(pre) new_body.append(statement) new_body.append(post) count = count + 1 else: new_body.append(statement) memset = C.Assign(C.SymbolRef('times'), C.FunctionCall(C.SymbolRef('calloc_doubles'),[C.Constant(count)])) new_body.insert(0, memset) new_body.insert(0, C.Assign(C.SymbolRef("*times", ctypes.c_double()), C.Constant(0))) for i in range(0,count): print_stmt = C.FunctionCall(C.SymbolRef('printf'),[C.String("\ttimes[%d] = %g\\n"), C.Constant(i), C.ArrayRef(C.SymbolRef('times'), C.Constant(i))]) new_body.append(print_stmt) node.defn = new_body return node
def visit_For(self, node): if node.pragma == "ivdep": block = [] loopvar = node.incr.arg size = node.test.right scalars = get_scalars_in_body(node) refs = get_array_references_in_body(node) ref_register_map = {} scalar_register_map = {} for index, ref in enumerate(refs): ref_register_map[str(ref)] = (ref, "va{}".format(index)) for index, scalar in enumerate(scalars): reg = "vs{}".format(index) scalar_register_map[scalar] = reg self.type_map[reg] = get_ctype(scalar) body = [] block.append(StringTemplate(hwacha_configure_block.format(SIZE=size))) node.incr = C.AddAssign(loopvar, C.SymbolRef("vector_length")) self.defns.append(get_asm_body(node, scalar_register_map, ref_register_map, self.type_map)) block.append(node) body.append(StringTemplate(bounds_check.format(SIZE=size, loopvar=loopvar))) for scalar in scalars: body.append(scalar_init(scalar)) body.append(StringTemplate(obtained_vector_length.format(SIZE=size, loopvar=loopvar))) block1 = "" block2 = "" index = 0 for _, info in ref_register_map.items(): ref, register = info block1 += "\t \"vmsa {0}, %{1}\\n\"\n".format(register, index) block2 += "\"r\"({0} + {1}),\n".format( ref.left.name, ref.right.name) index += 1 for scalar, register in scalar_register_map.items(): block1 += "\t \"vmss {0}, %{1}\\n\"\n".format(register, index) block2 += "\"r\"({0}.i),\n".format( "".join(number_dict[digit] for digit in str(scalar))) index += 1 block1 += "\"fence\\n\"\n" block1 += "\"vf 0(%{0})\\n\"\n".format(index) block2 += "\"r\" (&__hwacha_body)" body.append(StringTemplate( """ __asm__ volatile( {block1} : : {block2} : "memory" ); """.format(block1=block1, block2=block2))) node.body = body block.append( StringTemplate(""" __asm__ volatile( "fence\\n" ); """)) return block
def visit_For(self, node): node.body = [self.visit(s) for s in node.body] # node.body = util.flatten(node.body) if node.init.left.name == self.unroll_var: var = node.init.left.name factor, unroll_type = self.unroll_factor, self.unroll_type if unroll_type == 0: node.incr = C.AddAssign(C.SymbolRef(var), C.Constant(factor)) node.incr = C.AddAssign(C.SymbolRef(var), C.Constant(factor)) elif unroll_type == 1: assert (node.test.right.value % factor == 0) node.test.right.value = node.test.right.value // factor else: assert (0) ''' UnrollStatementsNoJam.new_body={} visitor = UnrollStatementsNoJam(self.unroll_var, self.unroll_factor, self.unroll_type) node.body = util.flatten([visitor.visit(s) for s in node.body]) ''' #new_body = [] #for i in range(1,factor): # self.newbody[i] = [] #for s in node.body: UnrollStatementsNoJam.new_body = {} for i in range(1, factor): UnrollStatementsNoJam.new_body[i] = [] visitor = UnrollStatementsNoJam(self.unroll_var, self.unroll_factor, self.unroll_type) node = visitor.visit(node) for i in range(1, factor): for j in range(len(UnrollStatementsNoJam.new_body[i])): node.body.append(UnrollStatementsNoJam.new_body[i][j]) node.body = util.flatten(node.body) ''' if not isinstance(s, o.For): #visitor = UnrollStatementsNoJam(self.unroll_var, self.unroll_factor, self.unroll_type) n = visitor.visit(s) new_body.append(n) for j in range(1, factor): for i in range(len(UnrollStatementsNoJam.new_body[j])): self.newbody[j].append(util.flatten(UnrollStatementsNoJam.new_body[j][i])) else: p = visitor.visit(s) UnrollStatementsNoJam.new_body={} n = [visitor.visit(t) for t in s.body] new_body.append(p) for j in range(1, factor): for i in range(len(UnrollStatementsNoJam.new_body[j])): self.newbody[j].append(C.For( C.Assign(C.SymbolRef(s.init.left.name, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(s.init.left.name), C.Constant(s.test.right.value)), C.AddAssign(C.SymbolRef(s.init.left.name), C.Constant(s.incr.value.value)), util.flatten(UnrollStatementsNoJam.new_body[j][i]))) for j in range(1, factor): for i in range(len(self.newbody[j])): new_body.append(self.newbody[j][i]) node.body = util.flatten(new_body) #node.body = new_body ''' return node
def visit_For(self, node): for j in range(1, self.factor): UnrollStatementsNoJam.new_body[j] = [] # UnrollStatementsNoJam.new_body={} #for i in node.body: #new_body_cpy = deepcopy(UnrollStatementsNoJam.new_body) #node.body = [self.visit(s) for s in node.body] newbody = [] for s in node.body: temp = deepcopy(UnrollStatementsNoJam.new_body) t = self.visit(s) stmt2 = deepcopy(t) stmt = deepcopy(t) if self.unroll_type == 0: s = util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(0)), stmt) else: s = util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(0)), stmt) newbody.append(t) if not isinstance(t, C.For): for i in range(1, self.factor): stmt = deepcopy(stmt2) if self.unroll_type == 0: if i in UnrollStatementsNoJam.new_body: UnrollStatementsNoJam.new_body[i].append( util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) else: UnrollStatementsNoJam.new_body[i] = [ util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt) ] elif self.unroll_type == 1: if i in UnrollStatementsNoJam.new_body: UnrollStatementsNoJam.new_body[i].append( util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt)) else: UnrollStatementsNoJam.new_body[i] = [ util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt) ] else: assert (false) else: var = t.init.left.name #if var != self.target_var: for j in range(1, self.factor): temp[j].append( C.For( C.Assign(C.SymbolRef(var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(var), C.Constant(t.test.right.value)), C.AddAssign(C.SymbolRef(var), C.Constant(t.incr.value.value)), UnrollStatementsNoJam.new_body[j])) UnrollStatementsNoJam.new_body = deepcopy(temp) node.body = newbody return node
def visit_RangeDim(self, node): iter = node.child_for.iter ensemble = node.ensemble ndim = node.mapping.ndim dim = iter.args[1].n offset = node.mapping.get_offset(dim) step = node.mapping.get_step(dim) length = len(node.mapping.shape[dim]) if isinstance(iter, ast.Call) and iter.func.id == "range_dim": loop_var = node.child_for.target.id body = [] body += [self.visit(s) for s in node.child_for.body] # FIXME: This check does not cover general cases #ANAND-special casing for LRN, needs refactoring if isinstance(self.ensemble, latte.ensemble.LRNEnsemble ) and length < latte.config.SIMDWIDTH: if ( self.direction == "forward" and "inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["inputs"]) ) or (self.direction in ["backward", "update_internal"] and "grad_inputs" in self.ensemble.tiling_info and any( dim == x[0] for x in self.ensemble.tiling_info["grad_inputs"])): body = [ UpdateInputIndices( loop_var + "_outer", C.Div( C.Add( C.SymbolRef(loop_var), C.SymbolRef( "_input_offset_{}_inner".format(dim + 1))), C.Constant(latte.config.SIMDWIDTH))).visit(s) for s in body ] body = [ UpdateInputIndices( "_input_offset_{}_inner".format(dim + 1), C.Constant(0)).visit(s) for s in body ] body = [ UpdateInputIndices( loop_var + "_inner", C.Mod( C.Add( C.SymbolRef(loop_var), C.SymbolRef( "_input_offset_{}_inner".format(dim + 1))), C.Constant(latte.config.SIMDWIDTH))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) else: body = [ UpdateInputIndices( loop_var, C.Mul(C.SymbolRef(loop_var), C.Constant(step))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) elif ( self.direction == "forward" and "inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["inputs"]) ) or (self.direction in ["backward", "update_internal"] and "grad_inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["grad_inputs"])): outer_loop = C.For( C.Assign(C.SymbolRef(loop_var + "_outer", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_outer"), C.Constant(length // latte.config.SIMDWIDTH)), C.AddAssign(C.SymbolRef(loop_var + "_outer"), C.Constant(1)), []) self.tiled_loops.append(outer_loop) if self.direction == "forward" and length < latte.config.SIMDWIDTH: inner_loop = C.For( C.Assign( C.SymbolRef(loop_var + "_inner", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_inner"), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var + "_inner"), C.Constant(1)), body, ) else: inner_loop = C.For( C.Assign( C.SymbolRef(loop_var + "_inner", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_inner"), C.Constant(latte.config.SIMDWIDTH)), C.AddAssign(C.SymbolRef(loop_var + "_inner"), C.Constant(1)), body, ) return inner_loop else: body = [ UpdateInputIndices( loop_var, C.Mul(C.SymbolRef(loop_var), C.Constant(step))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) raise NotImplementedError()