def test_mul_by_1(self): tree = C.Mul(C.Constant(1), C.SymbolRef("b")) tree = ConstantFold().visit(tree) self.assertEqual(tree, C.SymbolRef("b")) tree = C.Mul(C.SymbolRef("b"), C.Constant(1)) tree = ConstantFold().visit(tree) self.assertEqual(tree, C.SymbolRef("b"))
def insert_malloc(body, shape, name, dtype, _global=False): shape_str = "".join("[{}]".format(d) for d in shape[1:]) size = 1 for d in shape: size *= d body.insert( 0, StringTemplate( "$global$type (* $arg_name0)$shape = ($global$type (*)$cast) $arg_name1;", { "arg_name0": C.SymbolRef(name), "arg_name1": C.FunctionCall(C.SymbolRef('_mm_malloc'), [ C.Mul( C.Constant(size), C.FunctionCall(C.SymbolRef('sizeof'), [ ctree.types.codegen_type( ctree.types.get_c_type_from_numpy_dtype(dtype) ()) ])), C.Constant(64) ]), "shape": C.SymbolRef(shape_str), "cast": C.SymbolRef(shape_str), "type": C.SymbolRef( ctree.types.codegen_type( ctree.types.get_c_type_from_numpy_dtype(dtype)())), "global": C.SymbolRef("__global " if _global else "") }))
def rewrite_arg(self, arg): if isinstance(arg, C.UnaryOp) and isinstance( arg.op, C.Op.Ref) and isinstance( arg.arg, C.BinaryOp) and isinstance( arg.arg.op, C.Op.ArrayRef): curr_node = arg.arg elif isinstance(arg, C.BinaryOp) and isinstance(arg.op, C.Op.ArrayRef): curr_node = arg else: curr_node = None idx = self.dim num_zeroes = self.prefetch_num_zeroes while (idx + 1 != 0): if num_zeroes > 0: curr_node.right = C.Constant(0) num_zeroes -= 1 curr_node = curr_node.left idx += 1 old_expr = curr_node.right #if isinstance(old_expr, C.BinaryOp) and isinstance(old_expr.op, C.Op.Add): # old_expr = old_expr.left #new_expr = C.Add(old_expr, C.Mul(C.Add(C.SymbolRef(self.prefetch_loop_var), C.SymbolRef(self.prefetch_constant)), C.SymbolRef(self.prefetch_multiplier))) new_expr = C.Mul( C.Add(C.SymbolRef(self.prefetch_loop_var), C.SymbolRef(self.prefetch_constant)), C.SymbolRef(self.prefetch_multiplier)) curr_node.right = new_expr if isinstance(arg, C.BinaryOp) and isinstance(arg.op, C.Op.ArrayRef): return C.Ref(arg) return arg
def visit_If(self, node): check = [ util.contains_symbol(node, var) for var in list(self.unrolled_vars) + [self.target_var] ] if any(check): body = [] for i in range(self.factor): stmt = deepcopy(node) for var in self.unrolled_vars: stmt = util.replace_symbol(var, C.SymbolRef(var + "_" + str(i)), stmt) if self.unroll_type == 0: body.append( util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) elif self.unroll_type == 1: body.append( util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt)) else: assert (false) return body return node
def visit_BinaryOp(self, node): if isinstance(node.op, C.Op.Assign): check = [ util.contains_symbol(node.right, var) for var in list(self.unrolled_vars) + [self.target_var] ] if any(check): body = [] if hasattr(node.left, 'type') and node.left.type is not None: self.unrolled_vars.add(node.left.name) for i in range(self.factor): stmt = deepcopy(node) for var in self.unrolled_vars: stmt = util.replace_symbol( var, C.SymbolRef(var + "_" + str(i)), stmt) if self.unroll_type == 0: body.append( util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) elif self.unroll_type == 1: body.append( util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt)) else: assert (false) return body return node
def gen_loop_index(self, loopvars, shape): curr = C.SymbolRef(loopvars[-1]) for i in reversed(range(len(loopvars) - 1)): curr = C.Add( C.Mul(C.SymbolRef(loopvars[i]), C.Constant(np.prod(shape[i + 1:]))), curr) return curr
def test_no_folding(self): trees = [ C.Add(C.SymbolRef("a"), C.SymbolRef("b")), C.Sub(C.SymbolRef("a"), C.SymbolRef("b")), C.Mul(C.SymbolRef("a"), C.SymbolRef("b")), C.Div(C.SymbolRef("a"), C.SymbolRef("b")), ] for tree in trees: new_tree = ConstantFold().visit(tree) self.assertEqual(tree, new_tree)
def test_mul_constant(self): tree = C.Mul(C.Constant(20), C.Constant(10)) tree = ConstantFold().visit(tree) self.assertEqual(tree, C.Constant(200))
def visit_For(self, node): for j in range(1, self.factor): UnrollStatementsNoJam.new_body[j] = [] # UnrollStatementsNoJam.new_body={} #for i in node.body: #new_body_cpy = deepcopy(UnrollStatementsNoJam.new_body) #node.body = [self.visit(s) for s in node.body] newbody = [] for s in node.body: temp = deepcopy(UnrollStatementsNoJam.new_body) t = self.visit(s) stmt2 = deepcopy(t) stmt = deepcopy(t) if self.unroll_type == 0: s = util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(0)), stmt) else: s = util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(0)), stmt) newbody.append(t) if not isinstance(t, C.For): for i in range(1, self.factor): stmt = deepcopy(stmt2) if self.unroll_type == 0: if i in UnrollStatementsNoJam.new_body: UnrollStatementsNoJam.new_body[i].append( util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) else: UnrollStatementsNoJam.new_body[i] = [ util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt) ] elif self.unroll_type == 1: if i in UnrollStatementsNoJam.new_body: UnrollStatementsNoJam.new_body[i].append( util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt)) else: UnrollStatementsNoJam.new_body[i] = [ util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt) ] else: assert (false) else: var = t.init.left.name #if var != self.target_var: for j in range(1, self.factor): temp[j].append( C.For( C.Assign(C.SymbolRef(var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(var), C.Constant(t.test.right.value)), C.AddAssign(C.SymbolRef(var), C.Constant(t.incr.value.value)), UnrollStatementsNoJam.new_body[j])) UnrollStatementsNoJam.new_body = deepcopy(temp) node.body = newbody return node
def visit_AugAssign(self, node): check = [ util.contains_symbol(node.value, var) for var in list(self.unrolled_vars) + [self.target_var] ] if any(check): body = [] if isinstance(node.target, C.SymbolRef): self.unrolled_vars.add(self._get_name(node.target.name)) for i in range(self.factor): stmt = deepcopy(node) for var in self.unrolled_vars: stmt = util.replace_symbol( var, C.SymbolRef(var + "_" + str(i)), stmt) #body.append(util.replace_symbol(self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) if self.unroll_type == 0: body.append( util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) elif self.unroll_type == 1: body.append( util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt)) else: assert (false) return body elif isinstance(node.target, C.BinaryOp) and isinstance( node.target.op, C.Op.ArrayRef): assert False for i in range(self.factor): stmt = deepcopy(node) for var in self.unrolled_vars: stmt = util.replace_symbol( var, C.SymbolRef(var + "_" + str(i)), stmt) #body.append(util.replace_symbol(self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) if self.unroll_type == 0: body.append( util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) elif self.unroll_type == 1: body.append( util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt)) else: assert (false) return body else: raise NotImplementedError() return node
def visit_RangeDim(self, node): iter = node.child_for.iter ensemble = node.ensemble ndim = node.mapping.ndim dim = iter.args[1].n offset = node.mapping.get_offset(dim) step = node.mapping.get_step(dim) length = len(node.mapping.shape[dim]) if isinstance(iter, ast.Call) and iter.func.id == "range_dim": loop_var = node.child_for.target.id body = [] body += [self.visit(s) for s in node.child_for.body] # FIXME: This check does not cover general cases #ANAND-special casing for LRN, needs refactoring if isinstance(self.ensemble, latte.ensemble.LRNEnsemble ) and length < latte.config.SIMDWIDTH: if ( self.direction == "forward" and "inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["inputs"]) ) or (self.direction in ["backward", "update_internal"] and "grad_inputs" in self.ensemble.tiling_info and any( dim == x[0] for x in self.ensemble.tiling_info["grad_inputs"])): body = [ UpdateInputIndices( loop_var + "_outer", C.Div( C.Add( C.SymbolRef(loop_var), C.SymbolRef( "_input_offset_{}_inner".format(dim + 1))), C.Constant(latte.config.SIMDWIDTH))).visit(s) for s in body ] body = [ UpdateInputIndices( "_input_offset_{}_inner".format(dim + 1), C.Constant(0)).visit(s) for s in body ] body = [ UpdateInputIndices( loop_var + "_inner", C.Mod( C.Add( C.SymbolRef(loop_var), C.SymbolRef( "_input_offset_{}_inner".format(dim + 1))), C.Constant(latte.config.SIMDWIDTH))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) else: body = [ UpdateInputIndices( loop_var, C.Mul(C.SymbolRef(loop_var), C.Constant(step))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) elif ( self.direction == "forward" and "inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["inputs"]) ) or (self.direction in ["backward", "update_internal"] and "grad_inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["grad_inputs"])): outer_loop = C.For( C.Assign(C.SymbolRef(loop_var + "_outer", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_outer"), C.Constant(length // latte.config.SIMDWIDTH)), C.AddAssign(C.SymbolRef(loop_var + "_outer"), C.Constant(1)), []) self.tiled_loops.append(outer_loop) if self.direction == "forward" and length < latte.config.SIMDWIDTH: inner_loop = C.For( C.Assign( C.SymbolRef(loop_var + "_inner", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_inner"), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var + "_inner"), C.Constant(1)), body, ) else: inner_loop = C.For( C.Assign( C.SymbolRef(loop_var + "_inner", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_inner"), C.Constant(latte.config.SIMDWIDTH)), C.AddAssign(C.SymbolRef(loop_var + "_inner"), C.Constant(1)), body, ) return inner_loop else: body = [ UpdateInputIndices( loop_var, C.Mul(C.SymbolRef(loop_var), C.Constant(step))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) raise NotImplementedError()