def __setstate__(self, state): attribs, p_hash_digest = state new_fields = set() for k, v in six.iteritems(attribs): setattr(self, k, v) new_fields.add(k) self.register_fields(new_fields) if 0: # {{{ check that 'reconstituted' object has same hash from loopy.tools import LoopyKeyBuilder LoopyKeyBuilder()(self) assert p_hash_digest == self._pytools_persistent_hash_digest # }}} else: self._pytools_persistent_hash_digest = p_hash_digest from loopy.kernel.tools import SetOperationCacheManager self.cache_manager = SetOperationCacheManager() self._kernel_executor_cache = {}
def __setstate__(self, state): new_fields = set() for k, v in six.iteritems(state): setattr(self, k, v) new_fields.add(k) self.register_fields(new_fields) from loopy.kernel.tools import SetOperationCacheManager self.cache_manager = SetOperationCacheManager()
def __init__( self, domains, instructions, args=[], schedule=None, name="loopy_kernel", preambles=[], preamble_generators=[], assumptions=None, local_sizes={}, temporary_variables={}, iname_to_tag={}, substitutions={}, function_manglers=[ default_function_mangler, single_arg_function_mangler, ], symbol_manglers=[], iname_slab_increments={}, loop_priority=frozenset(), silenced_warnings=[], applied_iname_rewrites=[], cache_manager=None, index_dtype=np.int32, options=None, state=kernel_state.INITIAL, target=None, # When kernels get intersected in slab decomposition, # their grid sizes shouldn't change. This provides # a way to forward sub-kernel grid size requests. get_grid_sizes_for_insn_ids=None): if cache_manager is None: from loopy.kernel.tools import SetOperationCacheManager cache_manager = SetOperationCacheManager() # {{{ make instruction ids unique from loopy.kernel.creation import UniqueName insn_ids = set() for insn in instructions: if insn.id is not None and not isinstance(insn.id, UniqueName): if insn.id in insn_ids: raise RuntimeError("duplicate instruction id: %s" % insn.id) insn_ids.add(insn.id) insn_id_gen = UniqueNameGenerator(insn_ids) new_instructions = [] for insn in instructions: if insn.id is None: new_instructions.append(insn.copy(id=insn_id_gen("insn"))) elif isinstance(insn.id, UniqueName): new_instructions.append( insn.copy(id=insn_id_gen(insn.id.name))) else: new_instructions.append(insn) instructions = new_instructions del new_instructions # }}} # {{{ process assumptions if assumptions is None: dom0_space = domains[0].get_space() assumptions_space = isl.Space.params_alloc( dom0_space.get_ctx(), dom0_space.dim(dim_type.param)) for i in range(dom0_space.dim(dim_type.param)): assumptions_space = assumptions_space.set_dim_name( dim_type.param, i, dom0_space.get_dim_name(dim_type.param, i)) assumptions = isl.BasicSet.universe(assumptions_space) elif isinstance(assumptions, str): assumptions_set_str = "[%s] -> { : %s}" \ % (",".join(s for s in self.outer_params(domains)), assumptions) assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(), assumptions_set_str) assert assumptions.is_params() # }}} from loopy.types import to_loopy_type index_dtype = to_loopy_type(index_dtype, target=target) if not index_dtype.is_integral(): raise TypeError("index_dtype must be an integer") if np.iinfo(index_dtype.numpy_dtype).min >= 0: raise TypeError("index_dtype must be signed") if get_grid_sizes_for_insn_ids is not None: # overwrites method down below self.get_grid_sizes_for_insn_ids = get_grid_sizes_for_insn_ids if state not in [ kernel_state.INITIAL, kernel_state.PREPROCESSED, kernel_state.SCHEDULED, ]: raise ValueError("invalid value for 'state'") assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT ImmutableRecordWithoutPickling.__init__( self, domains=domains, instructions=instructions, args=args, schedule=schedule, name=name, preambles=preambles, preamble_generators=preamble_generators, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, iname_to_tag=iname_to_tag, substitutions=substitutions, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, function_manglers=function_manglers, symbol_manglers=symbol_manglers, index_dtype=index_dtype, options=options, state=state, target=target) self._kernel_executor_cache = {}
def __init__(self, domains, instructions, args=None, schedule=None, name="loopy_kernel", preambles=None, preamble_generators=None, assumptions=None, local_sizes=None, temporary_variables=None, iname_to_tags=None, substitutions=None, function_manglers=None, symbol_manglers=[], iname_slab_increments=None, loop_priority=frozenset(), silenced_warnings=None, applied_iname_rewrites=None, cache_manager=None, index_dtype=np.int32, options=None, state=KernelState.INITIAL, target=None, overridden_get_grid_sizes_for_insn_ids=None, _cached_written_variables=None): """ :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get intersected in slab decomposition, their grid sizes shouldn't change. This provides a way to forward sub-kernel grid size requests. """ # {{{ process constructor arguments if args is None: args = [] if preambles is None: preambles = [] if preamble_generators is None: preamble_generators = [] if local_sizes is None: local_sizes = {} if temporary_variables is None: temporary_variables = {} if iname_to_tags is None: iname_to_tags = {} if substitutions is None: substitutions = {} if function_manglers is None: function_manglers = [ default_function_mangler, single_arg_function_mangler, ] if symbol_manglers is None: function_manglers = [ default_function_mangler, single_arg_function_mangler, ] if iname_slab_increments is None: iname_slab_increments = {} if silenced_warnings is None: silenced_warnings = [] if applied_iname_rewrites is None: applied_iname_rewrites = [] if cache_manager is None: from loopy.kernel.tools import SetOperationCacheManager cache_manager = SetOperationCacheManager() # }}} # {{{ process assumptions if assumptions is None: dom0_space = domains[0].get_space() assumptions_space = isl.Space.params_alloc( dom0_space.get_ctx(), dom0_space.dim(dim_type.param)) for i in range(dom0_space.dim(dim_type.param)): assumptions_space = assumptions_space.set_dim_name( dim_type.param, i, dom0_space.get_dim_name(dim_type.param, i)) assumptions = isl.BasicSet.universe(assumptions_space) elif isinstance(assumptions, str): assumptions_set_str = "[%s] -> { : %s}" \ % (",".join(s for s in self.outer_params(domains)), assumptions) assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(), assumptions_set_str) assert assumptions.is_params() # }}} from loopy.types import to_loopy_type index_dtype = to_loopy_type(index_dtype, target=target) if not index_dtype.is_integral(): raise TypeError("index_dtype must be an integer") if np.iinfo(index_dtype.numpy_dtype).min >= 0: raise TypeError("index_dtype must be signed") if state not in [ KernelState.INITIAL, KernelState.PREPROCESSED, KernelState.SCHEDULED, ]: raise ValueError("invalid value for 'state'") from collections import defaultdict assert not isinstance(iname_to_tags, defaultdict) for iname, tags in six.iteritems(iname_to_tags): # don't tolerate empty sets assert tags assert isinstance(tags, frozenset) assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT ImmutableRecordWithoutPickling.__init__(self, domains=domains, instructions=instructions, args=args, schedule=schedule, name=name, preambles=preambles, preamble_generators=preamble_generators, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, iname_to_tags=iname_to_tags, substitutions=substitutions, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, function_manglers=function_manglers, symbol_manglers=symbol_manglers, index_dtype=index_dtype, options=options, state=state, target=target, overridden_get_grid_sizes_for_insn_ids=( overridden_get_grid_sizes_for_insn_ids), _cached_written_variables=_cached_written_variables) self._kernel_executor_cache = {}