def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, default_tag="l.auto", rule_name=None, temporary_name=None, temporary_scope=None, temporary_is_local=None, footprint_subscripts=None, fetch_bounding_box=False, fetch_outer_inames=None): """Prefetch all accesses to the variable *var_name*, with all accesses being swept through *sweep_inames*. :arg dim_arg_names: List of names representing each fetch axis. :arg rule_name: base name of the generated temporary variable. :arg footprint_subscripts: A list of tuples indicating the index (i.e. subscript) tuples used to generate the footprint. If only one such set of indices is desired, this may also be specified directly by putting an index expression into *var_name*. Substitutions such as those occurring in dimension splits are recorded and also applied to these indices. :arg fetch_outer_inames: The inames within which the fetch instruction is nested. If *None*, make an educated guess. This function combines :func:`extract_subst` and :func:`precompute`. """ # {{{ fish indexing out of var_name and into footprint_subscripts from loopy.symbolic import parse parsed_var_name = parse(var_name) from pymbolic.primitives import Variable, Subscript if isinstance(parsed_var_name, Variable): # nothing to see pass elif isinstance(parsed_var_name, Subscript): if footprint_subscripts is not None: raise TypeError( "if footprint_subscripts is specified, then var_name " "may not contain a subscript") assert isinstance(parsed_var_name.aggregate, Variable) footprint_subscripts = [parsed_var_name.index] parsed_var_name = parsed_var_name.aggregate else: raise ValueError( "var_name must either be a variable name or a subscript") # }}} # {{{ fish out tag from loopy.symbolic import TaggedVariable if isinstance(parsed_var_name, TaggedVariable): var_name = parsed_var_name.name tag = parsed_var_name.tag else: var_name = parsed_var_name.name tag = None # }}} c_name = var_name if tag is not None: c_name = c_name + "_" + tag var_name_gen = kernel.get_var_name_generator() if rule_name is None: rule_name = var_name_gen("%s_fetch_rule" % c_name) if temporary_name is None: temporary_name = var_name_gen("%s_fetch" % c_name) arg = kernel.arg_dict[var_name] # {{{ make parameter names and unification template parameters = [] for i in range(arg.num_user_axes()): based_on = "%s_dim_%d" % (c_name, i) if arg.dim_names is not None: based_on = "%s_dim_%s" % (c_name, arg.dim_names[i]) if dim_arg_names is not None and i < len(dim_arg_names): based_on = dim_arg_names[i] par_name = var_name_gen(based_on=based_on) parameters.append(par_name) from pymbolic import var uni_template = parsed_var_name if len(parameters) > 1: uni_template = uni_template.index( tuple(var(par_name) for par_name in parameters)) elif len(parameters) == 1: uni_template = uni_template.index(var(parameters[0])) # }}} from loopy.transform.subst import extract_subst kernel = extract_subst(kernel, rule_name, uni_template, parameters) if isinstance(sweep_inames, str): sweep_inames = [s.strip() for s in sweep_inames.split(",")] else: # copy, standardize to list sweep_inames = list(sweep_inames) kernel, subst_use, sweep_inames, inames_to_be_removed = \ _process_footprint_subscripts( kernel, rule_name, sweep_inames, footprint_subscripts, arg) from loopy.transform.precompute import precompute new_kernel = precompute(kernel, subst_use, sweep_inames, precompute_inames=dim_arg_names, default_tag=default_tag, dtype=arg.dtype, fetch_bounding_box=fetch_bounding_box, temporary_name=temporary_name, temporary_scope=temporary_scope, temporary_is_local=temporary_is_local, precompute_outer_inames=fetch_outer_inames) # {{{ remove inames that were temporarily added by slice sweeps new_domains = new_kernel.domains[:] for iname in inames_to_be_removed: home_domain_index = kernel.get_home_domain_index(iname) domain = new_domains[home_domain_index] dt, idx = domain.get_var_dict()[iname] assert dt == dim_type.set new_domains[home_domain_index] = domain.project_out(dt, idx, 1) new_kernel = new_kernel.copy(domains=new_domains) # }}} # If the rule survived past precompute() (i.e. some accesses fell outside # the footprint), get rid of it before moving on. if rule_name in new_kernel.substitutions: from loopy.transform.subst import expand_subst return expand_subst(new_kernel, "... > id:" + rule_name) else: return new_kernel
def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, # "None" is a valid value here, distinct from the default. default_tag=_not_provided, rule_name=None, temporary_name=None, temporary_scope=None, temporary_is_local=None, footprint_subscripts=None, fetch_bounding_box=False, fetch_outer_inames=None): """Prefetch all accesses to the variable *var_name*, with all accesses being swept through *sweep_inames*. :arg var_name: A string, the name of the variable being prefetched. This may be a 'tagged variable name' (such as ``field$mytag`` to restrict the effect of the operation to only variable accesses with a matching tag. This may also be a subscripted version of the variable, in which case this access dictates the footprint that is prefetched, e.g. ``A[:,:]`` or ``field[i,j,:,:]``. In this case, accesses in the kernel are disregarded. :arg sweep_inames: A list of inames, or a comma-separated string of them. This routine 'sweeps' all accesses to *var_name* through all allowed values of the *sweep_inames* to generate a footprint. All values in this footprint are then stored in a temporary variable, and the original variable accesses replaced with accesses to this temporary. :arg dim_arg_names: List of names representing each fetch axis. These names show up as inames in the generated fetch code :arg default_tag: The :ref:`implementation tag <iname-tags>` to assign to the inames driving the prefetch code. Use *None* to leave them undefined (to assign them later by hand). The current default will make them local axes and automatically split them to fit the work group size, but this default will disappear in favor of simply leaving them untagged in 2019.x. For 2018.x, a warning will be issued if no *default_tag* is specified. :arg rule_name: base name of the generated temporary variable. :arg temporary_name: The name of the temporary to be used. :arg temporary_scope: The :class:`temp_var_scope` to use for the temporary. :arg temporary_is_local: Deprecated, use *temporary_scope* instead. :arg footprint_subscripts: A list of tuples indicating the index (i.e. subscript) tuples used to generate the footprint. If only one such set of indices is desired, this may also be specified directly by putting an index expression into *var_name*. Substitutions such as those occurring in dimension splits are recorded and also applied to these indices. :arg fetch_bounding_box: To fit within :mod:`loopy`'s execution model, the 'footprint' of the fetch currently has to be a convex set. Sometimes this is not the case, e.g. for a high-order stencil:: o o ooooo o o The footprint of the stencil when 'swept' over a base domain would look like this, and because of the 'missing corners', this set is not convex:: oooooooooo oooooooooo oooooooooooooo oooooooooooooo oooooooooooooo oooooooooooooo oooooooooo oooooooooo Passing ``fetch_bounding_box=True`` gives :mod:`loopy` permission to instead fetch the 'bounding box' of the footprint, i.e. this set in the stencil example:: OOooooooooooOO OOooooooooooOO oooooooooooooo oooooooooooooo oooooooooooooo oooooooooooooo OOooooooooooOO OOooooooooooOO Note the added corners marked with "``O``". The resulting footprint is guaranteed to be convex. :arg fetch_outer_inames: The inames within which the fetch instruction is nested. If *None*, make an educated guess. This function internally uses :func:`extract_subst` and :func:`precompute`. """ # {{{ fish indexing out of var_name and into footprint_subscripts from loopy.symbolic import parse parsed_var_name = parse(var_name) from pymbolic.primitives import Variable, Subscript if isinstance(parsed_var_name, Variable): # nothing to see pass elif isinstance(parsed_var_name, Subscript): if footprint_subscripts is not None: raise TypeError("if footprint_subscripts is specified, then var_name " "may not contain a subscript") assert isinstance(parsed_var_name.aggregate, Variable) footprint_subscripts = [parsed_var_name.index] parsed_var_name = parsed_var_name.aggregate else: raise ValueError("var_name must either be a variable name or a subscript") # }}} # {{{ fish out tag from loopy.symbolic import TaggedVariable if isinstance(parsed_var_name, TaggedVariable): var_name = parsed_var_name.name tag = parsed_var_name.tag else: var_name = parsed_var_name.name tag = None # }}} c_name = var_name if tag is not None: c_name = c_name + "_" + tag var_name_gen = kernel.get_var_name_generator() if rule_name is None: rule_name = var_name_gen("%s_fetch_rule" % c_name) if temporary_name is None: temporary_name = var_name_gen("%s_fetch" % c_name) arg = kernel.arg_dict[var_name] # {{{ make parameter names and unification template parameters = [] for i in range(arg.num_user_axes()): based_on = "%s_dim_%d" % (c_name, i) if arg.dim_names is not None: based_on = "%s_dim_%s" % (c_name, arg.dim_names[i]) if dim_arg_names is not None and i < len(dim_arg_names): based_on = dim_arg_names[i] par_name = var_name_gen(based_on=based_on) parameters.append(par_name) from pymbolic import var uni_template = parsed_var_name if len(parameters) > 1: uni_template = uni_template.index( tuple(var(par_name) for par_name in parameters)) elif len(parameters) == 1: uni_template = uni_template.index(var(parameters[0])) # }}} from loopy.transform.subst import extract_subst kernel = extract_subst(kernel, rule_name, uni_template, parameters) if isinstance(sweep_inames, str): sweep_inames = [s.strip() for s in sweep_inames.split(",")] else: # copy, standardize to list sweep_inames = list(sweep_inames) kernel, subst_use, sweep_inames, inames_to_be_removed = \ _process_footprint_subscripts( kernel, rule_name, sweep_inames, footprint_subscripts, arg) # Our _not_provided is actually a different object from the one in the # precompute module, but precompute acutally uses that to adjust its # warning message. from loopy.transform.precompute import precompute new_kernel = precompute(kernel, subst_use, sweep_inames, precompute_inames=dim_arg_names, default_tag=default_tag, dtype=arg.dtype, fetch_bounding_box=fetch_bounding_box, temporary_name=temporary_name, temporary_scope=temporary_scope, temporary_is_local=temporary_is_local, precompute_outer_inames=fetch_outer_inames) # {{{ remove inames that were temporarily added by slice sweeps new_domains = new_kernel.domains[:] for iname in inames_to_be_removed: home_domain_index = kernel.get_home_domain_index(iname) domain = new_domains[home_domain_index] dt, idx = domain.get_var_dict()[iname] assert dt == dim_type.set new_domains[home_domain_index] = domain.project_out(dt, idx, 1) new_kernel = new_kernel.copy(domains=new_domains) # }}} # If the rule survived past precompute() (i.e. some accesses fell outside # the footprint), get rid of it before moving on. if rule_name in new_kernel.substitutions: from loopy.transform.subst import expand_subst return expand_subst(new_kernel, "... > id:"+rule_name) else: return new_kernel
def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, default_tag="l.auto", rule_name=None, temporary_name=None, temporary_is_local=None, footprint_subscripts=None, fetch_bounding_box=False): """Prefetch all accesses to the variable *var_name*, with all accesses being swept through *sweep_inames*. :arg dim_arg_names: List of names representing each fetch axis. :arg rule_name: base name of the generated temporary variable. :arg footprint_subscripts: A list of tuples indicating the index (i.e. subscript) tuples used to generate the footprint. If only one such set of indices is desired, this may also be specified directly by putting an index expression into *var_name*. Substitutions such as those occurring in dimension splits are recorded and also applied to these indices. This function combines :func:`extract_subst` and :func:`precompute`. """ # {{{ fish indexing out of var_name and into footprint_subscripts from loopy.symbolic import parse parsed_var_name = parse(var_name) from pymbolic.primitives import Variable, Subscript if isinstance(parsed_var_name, Variable): # nothing to see pass elif isinstance(parsed_var_name, Subscript): if footprint_subscripts is not None: raise TypeError("if footprint_subscripts is specified, then var_name " "may not contain a subscript") assert isinstance(parsed_var_name.aggregate, Variable) footprint_subscripts = [parsed_var_name.index] parsed_var_name = parsed_var_name.aggregate else: raise ValueError("var_name must either be a variable name or a subscript") # }}} # {{{ fish out tag from loopy.symbolic import TaggedVariable if isinstance(parsed_var_name, TaggedVariable): var_name = parsed_var_name.name tag = parsed_var_name.tag else: var_name = parsed_var_name.name tag = None # }}} c_name = var_name if tag is not None: c_name = c_name + "_" + tag var_name_gen = kernel.get_var_name_generator() if rule_name is None: rule_name = var_name_gen("%s_fetch_rule" % c_name) if temporary_name is None: temporary_name = var_name_gen("%s_fetch" % c_name) arg = kernel.arg_dict[var_name] # {{{ make parameter names and unification template parameters = [] for i in range(arg.num_user_axes()): based_on = "%s_dim_%d" % (c_name, i) if arg.dim_names is not None: based_on = "%s_dim_%s" % (c_name, arg.dim_names[i]) if dim_arg_names is not None and i < len(dim_arg_names): based_on = dim_arg_names[i] par_name = var_name_gen(based_on=based_on) parameters.append(par_name) from pymbolic import var uni_template = parsed_var_name if len(parameters) > 1: uni_template = uni_template.index( tuple(var(par_name) for par_name in parameters)) elif len(parameters) == 1: uni_template = uni_template.index(var(parameters[0])) # }}} from loopy.transform.subst import extract_subst kernel = extract_subst(kernel, rule_name, uni_template, parameters) if isinstance(sweep_inames, str): sweep_inames = [s.strip() for s in sweep_inames.split(",")] else: # copy, standardize to list sweep_inames = list(sweep_inames) kernel, subst_use, sweep_inames, inames_to_be_removed = \ _process_footprint_subscripts( kernel, rule_name, sweep_inames, footprint_subscripts, arg) from loopy.transform.precompute import precompute new_kernel = precompute(kernel, subst_use, sweep_inames, precompute_inames=dim_arg_names, default_tag=default_tag, dtype=arg.dtype, fetch_bounding_box=fetch_bounding_box, temporary_name=temporary_name, temporary_is_local=temporary_is_local) # {{{ remove inames that were temporarily added by slice sweeps new_domains = new_kernel.domains[:] for iname in inames_to_be_removed: home_domain_index = kernel.get_home_domain_index(iname) domain = new_domains[home_domain_index] dt, idx = domain.get_var_dict()[iname] assert dt == dim_type.set new_domains[home_domain_index] = domain.project_out(dt, idx, 1) new_kernel = new_kernel.copy(domains=new_domains) # }}} # If the rule survived past precompute() (i.e. some accesses fell outside # the footprint), get rid of it before moving on. if rule_name in new_kernel.substitutions: from loopy.transform.subst import expand_subst return expand_subst(new_kernel, "... > id:"+rule_name) else: return new_kernel