def make_kernels(self, seq_dependencies): result = [] for sub in self.kernels: # {{{ figure out arguments kernel_data = [] for arg_name in sub.arg_names: dims = sub.dim_map.get(arg_name) if dims is not None: # default order is set to "F" in kernel creation below kernel_data.append( lp.GlobalArg( arg_name, dtype=sub.get_type(arg_name), shape=sub.get_loopy_shape(arg_name), )) else: kernel_data.append( lp.ValueArg(arg_name, dtype=sub.get_type(arg_name))) # }}} # {{{ figure out temporary variables for var_name in ( sub.known_names() - set(sub.arg_names) - sub.all_inames()): dtype = sub.get_type(var_name, none_ok=True) if sub.implicit_types is None and dtype is None: continue kernel_data.append( lp.TemporaryVariable( var_name, dtype=dtype, shape=sub.get_loopy_shape(var_name))) # }}} knl = lp.make_kernel( sub.index_sets, sub.instructions, kernel_data, name=sub.subprogram_name, default_order="F", index_dtype=self.index_dtype, target=self.target, seq_dependencies=seq_dependencies, ) from loopy.loop import fuse_loop_domains knl = fuse_loop_domains(knl) knl = lp.fold_constants(knl) result.append(knl) return result
def make_kernels(self): result = [] for sub in self.kernels: # {{{ figure out arguments kernel_data = [] for arg_name in sub.arg_names: dims = sub.dim_map.get(arg_name) if dims is not None: # default order is set to "F" in kernel creation below kernel_data.append( lp.GlobalArg( arg_name, dtype=sub.get_type(arg_name), shape=sub.get_loopy_shape(arg_name), )) else: kernel_data.append( lp.ValueArg(arg_name, dtype=sub.get_type(arg_name))) # }}} # {{{ figure out temporary variables for var_name in ( sub.known_names() - set(sub.arg_names) - sub.all_inames()): dtype = sub.get_type(var_name, none_ok=True) if sub.implicit_types is None and dtype is None: continue kernel_data.append( lp.TemporaryVariable( var_name, dtype=dtype, shape=sub.get_loopy_shape(var_name))) # }}} knl = lp.make_kernel( sub.index_sets, sub.instructions, kernel_data, name=sub.subprogram_name, default_order="F", index_dtype=self.index_dtype, target=self.target, ) from loopy.loop import fuse_loop_domains knl = fuse_loop_domains(knl) knl = lp.fold_constants(knl) result.append(knl) return result
def make_kernels(self, seq_dependencies): result = [] for sub in self.kernels: # {{{ figure out arguments kernel_data = [] for arg_name in sub.arg_names: dims = sub.dim_map.get(arg_name) if sub.data_map.get(arg_name) is not None: raise NotImplementedError("initializer for argument %s" % arg_name) if dims is not None: # default order is set to "F" in kernel creation below kernel_data.append( lp.GlobalArg( arg_name, dtype=sub.get_type(arg_name), shape=sub.get_loopy_shape(arg_name), )) else: kernel_data.append( lp.ValueArg(arg_name, dtype=sub.get_type(arg_name))) # }}} # {{{ figure out temporary variables for var_name in (sub.known_names() - set(sub.arg_names) - sub.all_inames()): dtype = sub.get_type(var_name, none_ok=True) if sub.implicit_types is None and dtype is None: continue kwargs = {} if sub.data_map.get(var_name) is not None: kwargs["read_only"] = True kwargs["address_space"] = lp.AddressSpace.PRIVATE kwargs["initializer"] = np.array(sub.data_map[var_name], dtype=dtype) kernel_data.append( lp.TemporaryVariable(var_name, dtype=dtype, shape=sub.get_loopy_shape(var_name), **kwargs)) # }}} knl = lp.make_function( sub.index_sets, sub.instructions, kernel_data, name=sub.subprogram_name, default_order="F", index_dtype=self.index_dtype, target=self.target, seq_dependencies=seq_dependencies, ) from loopy.loop import merge_loop_domains knl = merge_loop_domains(knl) knl = lp.fold_constants(knl) result.append(knl) return result