def __init__(self, func_ir, typemap, calltypes): self.func_ir = func_ir self.typemap = typemap self.calltypes = calltypes self._call_table, _ = get_call_table(func_ir.blocks) self._tuple_table = get_tuple_table(func_ir.blocks) self._parallel_accesses = set() self._T_arrs = set()
def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood, standard_indexed): """ Transforms the stencil kernel as specified by the user into one that includes each dimension's index variable as part of the getitem calls. So, in effect array[-1] becomes array[index0-1]. """ const_dict = {} kernel_consts = [] if config.DEBUG_ARRAY_OPT == 1: print("add_indices_to_kernel", ndim, neighborhood) ir_utils.dump_blocks(kernel.blocks) if neighborhood is None: need_to_calc_kernel = True else: need_to_calc_kernel = False if len(neighborhood) != ndim: raise ValueError("%d dimensional neighborhood specified for %d " \ "dimensional input array" % (len(neighborhood), ndim)) tuple_table = ir_utils.get_tuple_table(kernel.blocks) relatively_indexed = set() for block in kernel.blocks.values(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Const)): if config.DEBUG_ARRAY_OPT == 1: print("remembering in const_dict", stmt.target.name, stmt.value.value) # Remember consts for use later. const_dict[stmt.target.name] = stmt.value.value if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in kernel.arg_names) or (isinstance(stmt, ir.SetItem) and stmt.target.name in kernel.arg_names)): raise ValueError("Assignments to arrays passed to stencil " \ "kernels is not allowed.") if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['getitem', 'static_getitem'] and stmt.value.value.name in kernel.arg_names and stmt.value.value.name not in standard_indexed): # We found a getitem from the input array. if stmt.value.op == 'getitem': stmt_index_var = stmt.value.index else: stmt_index_var = stmt.value.index_var # allow static_getitem since rewrite passes are applied #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.") relatively_indexed.add(stmt.value.value.name) # Store the index used after looking up the variable in # the const dictionary. if need_to_calc_kernel: assert hasattr(stmt_index_var, 'name') if stmt_index_var.name in tuple_table: kernel_consts += [tuple_table[stmt_index_var.name]] elif stmt_index_var.name in const_dict: kernel_consts += [const_dict[stmt_index_var.name]] else: raise ValueError( "stencil kernel index is not " "constant, 'neighborhood' option required") if ndim == 1: # Single dimension always has index variable 'index0'. # tmpvar will hold the real index and is computed by # adding the relative offset in stmt.value.index to # the current absolute location in index0. index_var = ir.Var(scope, index_names[0], loc) tmpname = ir_utils.mk_unique_var("stencil_index") tmpvar = ir.Var(scope, tmpname, loc) acc_call = ir.Expr.binop(operator.add, stmt_index_var, index_var, loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, tmpvar, loc), stmt.target, loc)) else: index_vars = [] sum_results = [] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) const_index_vars = [] ind_stencils = [] # Same idea as above but you have to extract # individual elements out of the tuple indexing # expression and add the corresponding index variable # to them and then reconstitute as a tuple that can # index the array. for dim in range(ndim): tmpname = ir_utils.mk_unique_var("const_index") tmpvar = ir.Var(scope, tmpname, loc) new_body.append( ir.Assign(ir.Const(dim, loc), tmpvar, loc)) const_index_vars += [tmpvar] index_var = ir.Var(scope, index_names[dim], loc) index_vars += [index_var] tmpname = ir_utils.mk_unique_var( "ind_stencil_index") tmpvar = ir.Var(scope, tmpname, loc) ind_stencils += [tmpvar] getitemname = ir_utils.mk_unique_var("getitem") getitemvar = ir.Var(scope, getitemname, loc) getitemcall = ir.Expr.getitem( stmt_index_var, const_index_vars[dim], loc) new_body.append( ir.Assign(getitemcall, getitemvar, loc)) acc_call = ir.Expr.binop(operator.add, getitemvar, index_vars[dim], loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) tuple_call = ir.Expr.build_tuple(ind_stencils, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) new_body.append( ir.Assign( ir.Expr.getitem(stmt.value.value, s_index_var, loc), stmt.target, loc)) else: new_body.append(stmt) block.body = new_body if need_to_calc_kernel: # Find the size of the kernel by finding the maximum absolute value # index used in the kernel specification. neighborhood = [[0, 0] for _ in range(ndim)] if len(kernel_consts) == 0: raise ValueError("Stencil kernel with no accesses to " "relatively indexed arrays.") for index in kernel_consts: if isinstance(index, tuple) or isinstance(index, list): for i in range(len(index)): te = index[i] if isinstance(te, ir.Var) and te.name in const_dict: te = const_dict[te.name] if isinstance(te, int): neighborhood[i][0] = min(neighborhood[i][0], te) neighborhood[i][1] = max(neighborhood[i][1], te) else: raise ValueError( "stencil kernel index is not constant," "'neighborhood' option required") index_len = len(index) elif isinstance(index, int): neighborhood[0][0] = min(neighborhood[0][0], index) neighborhood[0][1] = max(neighborhood[0][1], index) index_len = 1 else: raise ValueError( "Non-tuple or non-integer used as stencil index.") if index_len != ndim: raise ValueError( "Stencil index does not match array dimensionality.") return (neighborhood, relatively_indexed)
def _replace_stencil_accesses(self, stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict): """ Convert relative indexing in the stencil kernel to standard indexing by adding the loop index variables to the corresponding dimensions of the array index tuples. """ in_arr = in_args[0] in_arg_names = [x.name for x in in_args] if "standard_indexing" in stencil_func.options: for x in stencil_func.options["standard_indexing"]: if x not in arg_to_arr_dict: raise ValueError("Standard indexing requested for an array " \ "name not present in the stencil kernel definition.") standard_indexed = [arg_to_arr_dict[x] for x in stencil_func.options["standard_indexing"]] else: standard_indexed = [] if in_arr.name in standard_indexed: raise ValueError("The first argument to a stencil kernel must use " \ "relative indexing, not standard indexing.") ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc # replace access indices, find access lengths in each dimension need_to_calc_kernel = stencil_func.neighborhood is None # If we need to infer the kernel size then initialize the minimum and # maximum seen indices for each dimension to 0. If we already have # the neighborhood calculated then just convert from neighborhood format # to the separate start and end lengths format used here. if need_to_calc_kernel: start_lengths = ndims*[0] end_lengths = ndims*[0] else: start_lengths = [x[0] for x in stencil_func.neighborhood] end_lengths = [x[1] for x in stencil_func.neighborhood] # Get all the tuples defined in the stencil blocks. tuple_table = ir_utils.get_tuple_table(stencil_blocks) found_relative_index = False # For all blocks in the stencil kernel... for label, block in stencil_blocks.items(): new_body = [] # For all statements in those blocks... for stmt in block.body: # Reject assignments to input arrays. if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in in_arg_names) or ((isinstance(stmt, ir.SetItem) or isinstance(stmt, ir.StaticSetItem)) and stmt.target.name in in_arg_names)): raise ValueError("Assignments to arrays passed to stencil kernels is not allowed.") # We found a getitem for some array. If that array is an input # array and isn't in the list of standard indexed arrays then # update min and max seen indices if we are inferring the # kernel size and create a new tuple where the relative offsets # are added to loop index vars to get standard indexing. if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['static_getitem', 'getitem'] and stmt.value.value.name in in_arg_names and stmt.value.value.name not in standard_indexed): index_list = stmt.value.index # handle 1D case if ndims == 1: index_list = [index_list] else: if hasattr(index_list, 'name') and index_list.name in tuple_table: index_list = tuple_table[index_list.name] if index_offsets: index_list = self._add_index_offsets(index_list, list(index_offsets), new_body, scope, loc) # update min and max indices if need_to_calc_kernel: # all indices should be integer to be able to calculate # neighborhood automatically if (isinstance(index_list, ir.Var) or any([not isinstance(v, int) for v in index_list])): raise ValueError("Variable stencil index only " "possible with known neighborhood") start_lengths = list(map(min, start_lengths, index_list)) end_lengths = list(map(max, end_lengths, index_list)) found_relative_index = True # update access indices index_vars = self._add_index_offsets(parfor_vars, list(index_list), new_body, scope, loc) # new access index tuple if ndims == 1: ind_var = index_vars[0] else: ind_var = ir.Var(scope, mk_unique_var( "$parfor_index_ind_var"), loc) self.typemap[ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(index_vars, loc) tuple_assign = ir.Assign(tuple_call, ind_var, loc) new_body.append(tuple_assign) # getitem return type is scalar if all indices are integer if all([self.typemap[v.name] == types.intp for v in index_vars]): getitem_return_typ = self.typemap[ stmt.value.value.name].dtype else: # getitem returns an array getitem_return_typ = self.typemap[stmt.value.value.name] # new getitem with the new index var getitem_call = ir.Expr.getitem(stmt.value.value, ind_var, loc) self.calltypes[getitem_call] = signature( getitem_return_typ, self.typemap[stmt.value.value.name], self.typemap[ind_var.name]) stmt.value = getitem_call new_body.append(stmt) block.body = new_body if need_to_calc_kernel and not found_relative_index: raise ValueError("Stencil kernel with no accesses to " \ "relatively indexed arrays.") return start_lengths, end_lengths
def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood, standard_indexed): """ Transforms the stencil kernel as specified by the user into one that includes each dimension's index variable as part of the getitem calls. So, in effect array[-1] becomes array[index0-1]. """ const_dict = {} kernel_consts = [] if config.DEBUG_ARRAY_OPT == 1: print("add_indices_to_kernel", ndim, neighborhood) ir_utils.dump_blocks(kernel.blocks) if neighborhood is None: need_to_calc_kernel = True else: need_to_calc_kernel = False if len(neighborhood) != ndim: raise ValueError("%d dimensional neighborhood specified for %d " \ "dimensional input array" % (len(neighborhood), ndim)) tuple_table = ir_utils.get_tuple_table(kernel.blocks) relatively_indexed = set() for block in kernel.blocks.values(): scope = block.scope loc = block.loc new_body = [] for stmt in block.body: if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Const)): if config.DEBUG_ARRAY_OPT == 1: print("remembering in const_dict", stmt.target.name, stmt.value.value) # Remember consts for use later. const_dict[stmt.target.name] = stmt.value.value if ((isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['setitem', 'static_setitem'] and stmt.value.value.name in kernel.arg_names) or (isinstance(stmt, ir.SetItem) and stmt.target.name in kernel.arg_names)): raise ValueError("Assignments to arrays passed to stencil " \ "kernels is not allowed.") if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op in ['getitem', 'static_getitem'] and stmt.value.value.name in kernel.arg_names and stmt.value.value.name not in standard_indexed): # We found a getitem from the input array. if stmt.value.op == 'getitem': stmt_index_var = stmt.value.index else: stmt_index_var = stmt.value.index_var # allow static_getitem since rewrite passes are applied #raise ValueError("Unexpected static_getitem in add_indices_to_kernel.") relatively_indexed.add(stmt.value.value.name) # Store the index used after looking up the variable in # the const dictionary. if need_to_calc_kernel: assert hasattr(stmt_index_var, 'name') if stmt_index_var.name in tuple_table: kernel_consts += [tuple_table[stmt_index_var.name]] elif stmt_index_var.name in const_dict: kernel_consts += [const_dict[stmt_index_var.name]] else: raise ValueError("Non-constant specified for " "stencil kernel index.") if ndim == 1: # Single dimension always has index variable 'index0'. # tmpvar will hold the real index and is computed by # adding the relative offset in stmt.value.index to # the current absolute location in index0. index_var = ir.Var(scope, index_names[0], loc) tmpname = ir_utils.mk_unique_var("stencil_index") tmpvar = ir.Var(scope, tmpname, loc) acc_call = ir.Expr.binop('+', stmt_index_var, index_var, loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) new_body.append(ir.Assign( ir.Expr.getitem(stmt.value.value,tmpvar,loc), stmt.target,loc)) else: index_vars = [] sum_results = [] s_index_name = ir_utils.mk_unique_var("stencil_index") s_index_var = ir.Var(scope, s_index_name, loc) const_index_vars = [] ind_stencils = [] # Same idea as above but you have to extract # individual elements out of the tuple indexing # expression and add the corresponding index variable # to them and then reconstitute as a tuple that can # index the array. for dim in range(ndim): tmpname = ir_utils.mk_unique_var("const_index") tmpvar = ir.Var(scope, tmpname, loc) new_body.append(ir.Assign(ir.Const(dim, loc), tmpvar, loc)) const_index_vars += [tmpvar] index_var = ir.Var(scope, index_names[dim], loc) index_vars += [index_var] tmpname = ir_utils.mk_unique_var("ind_stencil_index") tmpvar = ir.Var(scope, tmpname, loc) ind_stencils += [tmpvar] getitemname = ir_utils.mk_unique_var("getitem") getitemvar = ir.Var(scope, getitemname, loc) getitemcall = ir.Expr.getitem(stmt_index_var, const_index_vars[dim], loc) new_body.append(ir.Assign(getitemcall, getitemvar, loc)) acc_call = ir.Expr.binop('+', getitemvar, index_vars[dim], loc) new_body.append(ir.Assign(acc_call, tmpvar, loc)) tuple_call = ir.Expr.build_tuple(ind_stencils, loc) new_body.append(ir.Assign(tuple_call, s_index_var, loc)) new_body.append(ir.Assign( ir.Expr.getitem(stmt.value.value,s_index_var,loc), stmt.target,loc)) else: new_body.append(stmt) block.body = new_body if need_to_calc_kernel: # Find the size of the kernel by finding the maximum absolute value # index used in the kernel specification. neighborhood = [[0,0] for _ in range(ndim)] if len(kernel_consts) == 0: raise ValueError("Stencil kernel with no accesses to " "relatively indexed arrays.") for index in kernel_consts: if isinstance(index, tuple) or isinstance(index, list): for i in range(len(index)): te = index[i] if isinstance(te, ir.Var) and te.name in const_dict: te = const_dict[te.name] if isinstance(te, int): neighborhood[i][0] = min(neighborhood[i][0], te) neighborhood[i][1] = max(neighborhood[i][1], te) else: raise ValueError( "Non-constant used as stencil index.") index_len = len(index) elif isinstance(index, int): neighborhood[0][0] = min(neighborhood[0][0], index) neighborhood[0][1] = max(neighborhood[0][1], index) index_len = 1 else: raise ValueError( "Non-tuple or non-integer used as stencil index.") if index_len != ndim: raise ValueError( "Stencil index does not match array dimensionality.") return (neighborhood, relatively_indexed)