def __init__(self, **kwargs): kwargs["name"] = intern(kwargs.pop("name")) target = kwargs.pop("target", None) dtype = kwargs.pop("dtype", None) if 'for_atomic' in kwargs: for_atomic = kwargs['for_atomic'] else: for_atomic = False from loopy.types import to_loopy_type dtype = to_loopy_type( dtype, allow_auto=True, allow_none=True, for_atomic=for_atomic, target=target) import loopy as lp if dtype is lp.auto: warn("Argument/temporary data type for '%s' should be None if " "unspecified, not auto. This usage will be disallowed in 2018." % kwargs["name"], DeprecationWarning, stacklevel=2) dtype = None kwargs["dtype"] = dtype ImmutableRecord.__init__(self, **kwargs)
def __call__(self, preamble_info): from loopy.kernel.data import temp_var_scope as scopes # find a function matching our name func_match = next( (x for x in preamble_info.seen_functions if x.name == self.func_name), None) desc = 'custom_funcs_indirect' if func_match is not None: from loopy.types import to_loopy_type # check types if tuple(to_loopy_type(x) for x in self.func_arg_dtypes) == \ func_match.arg_dtypes: # if match, create our temporary var = lp.TemporaryVariable( 'lookup', initializer=self.arr, dtype=self.arr.dtype, shape=self.arr.shape, scope=scopes.GLOBAL, read_only=True) # and code code = """ int {name}(int start, int end, int match) {{ int result = start; for (int i = start + 1; i < end; ++i) {{ if (lookup[i] == match) result = i; }} return result; }} """.format(name=self.func_name) # generate temporary variable code from cgen import Initializer from loopy.target.c import generate_array_literal codegen_state = preamble_info.codegen_state.copy( is_generating_device_code=True) kernel = preamble_info.kernel ast_builder = codegen_state.ast_builder target = kernel.target decl_info, = var.decl_info(target, index_dtype=kernel.index_dtype) decl = ast_builder.wrap_global_constant( ast_builder.get_temporary_decl( codegen_state, None, var, decl_info)) if var.initializer is not None: decl = Initializer(decl, generate_array_literal( codegen_state, var, var.initializer)) # return generated code yield (desc, '\n'.join([str(decl), code]))
def __call__(self, preamble_info): # find a function matching our name func_match = next( (x for x in preamble_info.seen_functions if x.name == self.func_name), None) desc = 'custom_funcs_indirect' if func_match is not None: from loopy.types import to_loopy_type # check types if tuple(to_loopy_type(x) for x in self.func_arg_dtypes) == \ func_match.arg_dtypes: # if match, create our temporary var = lp.TemporaryVariable( 'lookup', initializer=self.arr, dtype=self.arr.dtype, shape=self.arr.shape, address_space=lp.AddressSpace.GLOBAL, read_only=True) # and code code = """ int {name}(int start, int end, int match) {{ int result = start; for (int i = start + 1; i < end; ++i) {{ if (lookup[i] == match) result = i; }} return result; }} """.format(name=self.func_name) # generate temporary variable code from cgen import Initializer from loopy.target.c import generate_array_literal codegen_state = preamble_info.codegen_state.copy( is_generating_device_code=True) kernel = preamble_info.kernel ast_builder = codegen_state.ast_builder target = kernel.target decl_info, = var.decl_info(target, index_dtype=kernel.index_dtype) decl = ast_builder.wrap_global_constant( ast_builder.get_temporary_decl( codegen_state, None, var, decl_info)) if var.initializer is not None: decl = Initializer(decl, generate_array_literal( codegen_state, var, var.initializer)) # return generated code yield (desc, '\n'.join([str(decl), code]))
def _check_atomic_data(insn): # get the kernel arg written by this insn written = insn.assignee_var_names()[0] ind = next((i for i, d in enumerate(data) if d.name == written), None) # make sure the dtype is atomic, if not update it if ind is not None and not isinstance(data[ind].dtype, AtomicType): assert data[ind].dtype is not None, ( "Change of dtype to atomic doesn't work if base dype is not" " populated") data[ind] = data[ind].copy(for_atomic=True) elif ind is None: assert written in temps, ( 'Cannot find written atomic variable: {}'.format(written)) if not isinstance(temps[written].dtype, AtomicType): temps[written] = temps[written].copy(dtype=to_loopy_type( temps[written].dtype, for_atomic=True)) return written
def test_target_record(): # make bad argument (i.e, one without the target set) import numpy as np from loopy.types import to_loopy_type bad = lp.GlobalArg('bad', dtype=np.int32, shape=(1, ), order='C') def __check(record): with assert_raises(AssertionError): record.__getstate__() # and check list __check(TargetCheckingRecord(kernel_data=[bad])) # dictionary __check(TargetCheckingRecord(kernel_data={'a': bad})) # dictionary of lists __check(TargetCheckingRecord(kernel_data={'a': [bad]})) # and plain value __check(TargetCheckingRecord(kernel_data=bad)) # numpy dtype as dictionary key dtype = to_loopy_type(np.int32) __check(TargetCheckingRecord(kernel_data={dtype: 'bad'}))
def __call__(self, kernel, name, arg_dtypes): """ A function that will return a :class:`loopy.kernel.data.CallMangleInfo` to interface with the calling :class:`loopy.LoopKernel` """ if name != self.func_name: return None from loopy.types import to_loopy_type from loopy.kernel.data import CallMangleInfo def __compare(d1, d2): # compare dtypes ignoring atomic return to_loopy_type(d1, for_atomic=True) == \ to_loopy_type(d2, for_atomic=True) # check types if len(arg_dtypes) != len(arg_dtypes): raise Exception( 'Unexpected number of arguments provided to mangler ' '{}, expected {}, got {}'.format(self.func_name, len(self.func_arg_dtypes), len(arg_dtypes))) for i, (d1, d2) in enumerate(zip(self.func_arg_dtypes, arg_dtypes)): if not __compare(d1, d2): raise Exception( 'Argument at index {} for mangler {} does not ' 'match expected dtype. Expected {}, got {}'.format( i, self.func_name, str(d1), str(d2))) # get target for creation target = arg_dtypes[0].target return CallMangleInfo(target_name=self.func_name, result_dtypes=tuple( to_loopy_type(x, target=target) for x in self.func_result_dtypes), arg_dtypes=arg_dtypes)
def _add_dtypes(knl, dtype_dict): dtype_dict = dtype_dict.copy() new_args = [] from loopy.types import to_loopy_type for arg in knl.args: new_dtype = dtype_dict.pop(arg.name, None) if new_dtype is not None: new_dtype = to_loopy_type(new_dtype, target=knl.target) if arg.dtype is not None and arg.dtype != new_dtype: raise RuntimeError( "argument '%s' already has a different dtype " "(existing: %s, new: %s)" % (arg.name, arg.dtype, new_dtype)) arg = arg.copy(dtype=new_dtype) new_args.append(arg) new_temp_vars = knl.temporary_variables.copy() import loopy as lp for tv_name in knl.temporary_variables: new_dtype = dtype_dict.pop(tv_name, None) if new_dtype is not None: new_dtype = np.dtype(new_dtype) tv = new_temp_vars[tv_name] if (tv.dtype is not None and tv.dtype is not lp.auto) \ and tv.dtype != new_dtype: raise RuntimeError( "temporary variable '%s' already has a different dtype " "(existing: %s, new: %s)" % (tv_name, tv.dtype, new_dtype)) new_temp_vars[tv_name] = tv.copy(dtype=new_dtype) return dtype_dict, new_args, new_temp_vars
def __call__(self, kernel, name, arg_dtypes): """ A function that will return a :class:`loopy.kernel.data.CallMangleInfo` to interface with the calling :class:`loopy.LoopKernel` """ if name != self.func_name: return None from loopy.types import to_loopy_type from loopy.kernel.data import CallMangleInfo def __compare(d1, d2): # compare dtypes ignoring atomic return to_loopy_type(d1, for_atomic=True) == \ to_loopy_type(d2, for_atomic=True) # check types if len(arg_dtypes) != len(arg_dtypes): raise Exception('Unexpected number of arguments provided to mangler ' '{}, expected {}, got {}'.format( self.func_name, len(self.func_arg_dtypes), len(arg_dtypes))) for i, (d1, d2) in enumerate(zip(self.func_arg_dtypes, arg_dtypes)): if not __compare(d1, d2): raise Exception('Argument at index {} for mangler {} does not ' 'match expected dtype. Expected {}, got {}'. format(i, self.func_name, str(d1), str(d2))) # get target for creation target = arg_dtypes[0].target return CallMangleInfo( target_name=self.func_name, result_dtypes=tuple(to_loopy_type(x, target=target) for x in self.func_result_dtypes), arg_dtypes=arg_dtypes)
def __init__(self, name, dtype=None, shape=(), address_space=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, base_indices=None, storage_shape=None, base_storage=None, initializer=None, read_only=False, _base_storage_access_may_be_aliasing=False, **kwargs): """ :arg dtype: :class:`loopy.auto` or a :class:`numpy.dtype` :arg shape: :class:`loopy.auto` or a shape tuple :arg base_indices: :class:`loopy.auto` or a tuple of base indices """ scope = kwargs.pop("scope", None) if scope is not None: warn("Passing 'scope' is deprecated. Use 'address_space' instead.", DeprecationWarning, stacklevel=2) if address_space is not None: raise ValueError("only one of 'scope' and 'address_space' " "may be specified") else: address_space = scope del scope if address_space is None: address_space = auto if address_space is None: raise LoopyError( "temporary variable '%s': " "address_space must not be None" % name) if initializer is None: pass elif isinstance(initializer, np.ndarray): if offset != 0: raise LoopyError( "temporary variable '%s': " "offset must be 0 if initializer specified" % name) from loopy.types import NumpyType, to_loopy_type if dtype is auto or dtype is None: dtype = NumpyType(initializer.dtype) elif to_loopy_type(dtype) != to_loopy_type(initializer.dtype): raise LoopyError( "temporary variable '%s': " "dtype of initializer does not match " "dtype of array." % name) if shape is auto: shape = initializer.shape else: raise LoopyError( "temporary variable '%s': " "initializer must be None or a numpy array" % name) if order is None: order = "C" if base_indices is None: base_indices = (0,) * len(shape) if not read_only and initializer is not None: raise LoopyError( "temporary variable '%s': " "read-write variables with initializer " "are not currently supported " "(did you mean to set read_only=True?)" % name) if base_storage is not None and initializer is not None: raise LoopyError( "temporary variable '%s': " "base_storage and initializer are " "mutually exclusive" % name) if base_storage is None and _base_storage_access_may_be_aliasing: raise LoopyError( "temporary variable '%s': " "_base_storage_access_may_be_aliasing option, but no " "base_storage given!" % name) ArrayBase.__init__(self, name=intern(name), dtype=dtype, shape=shape, strides=strides, dim_tags=dim_tags, offset=offset, dim_names=dim_names, order=order, base_indices=base_indices, address_space=address_space, storage_shape=storage_shape, base_storage=base_storage, initializer=initializer, read_only=read_only, _base_storage_access_may_be_aliasing=( _base_storage_access_may_be_aliasing), **kwargs)
def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, target=None, alignment=None, **kwargs): """ All of the following (except *name*) are optional. Specify either strides or shape. :arg name: When passed to :class:`loopy.make_kernel`, this may contain multiple names separated by commas, in which case multiple arguments, each with identical properties, are created for each name. :arg shape: May be any of the things specified under :attr:`shape`, or a string which can be parsed into the previous form. :arg dim_tags: A comma-separated list of tags as understood by :func:`parse_array_dim_tag`. :arg strides: May be one of the following: * None * :class:`loopy.auto`. The strides will be determined by *order* and the access footprint. * a tuple like like :attr:`numpy.ndarray.shape`. Each entry of the tuple is also allowed to be a :mod:`pymbolic` expression involving kernel parameters, or a (potentially-comma separated) or a string that can be parsed to such an expression. * A string which can be parsed into the previous form. :arg order: "F" or "C" for C (row major) or Fortran (column major). Defaults to the *default_order* argument passed to :func:`loopy.make_kernel`. :arg for_atomic: Whether the array is declared for atomic access, and, if necessary, using atomic-capable data types. :arg offset: (See :attr:`offset`) :arg alignment: memory alignment in bytes """ for kwarg_name in kwargs: if kwarg_name not in self.allowed_extra_kwargs: raise TypeError("invalid kwarg: %s" % kwarg_name) import loopy as lp from loopy.types import to_loopy_type dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True, for_atomic=for_atomic, target=target) if dtype is lp.auto: from warnings import warn warn("Argument/temporary data type for '%s' should be None if " "unspecified, not auto. This usage will be disallowed in 2018." % name, DeprecationWarning, stacklevel=2) dtype = None strides_known = strides is not None and strides is not lp.auto shape_known = shape is not None and shape is not lp.auto if strides_known: strides = _parse_shape_or_strides(strides) if shape_known: shape = _parse_shape_or_strides(shape) # {{{ check dim_names if dim_names is not None: if len(dim_names) != len(set(dim_names)): raise LoopyError("dim_names are not unique") for n in dim_names: if not isinstance(n, str): raise LoopyError("found non-string '%s' in dim_names" % type(n).__name__) # }}} # {{{ convert strides to dim_tags (Note: strides override order) if dim_tags is not None and strides_known: raise TypeError("may not specify both strides and dim_tags") if dim_tags is None and strides_known: dim_tags = [FixedStrideArrayDimTag(s) for s in strides] strides = None # }}} if dim_tags is not None: dim_tags = parse_array_dim_tags(dim_tags, n_axes=(len(shape) if shape_known else None), use_increasing_target_axes=self.max_target_axes > 1, dim_names=dim_names) # {{{ determine number of user axes num_user_axes = None if shape_known: num_user_axes = len(shape) for dim_iterable in [dim_tags, dim_names]: if dim_iterable is not None: new_num_user_axes = len(dim_iterable) if num_user_axes is None: num_user_axes = new_num_user_axes else: if new_num_user_axes != num_user_axes: raise LoopyError("contradictory values for number of " "dimensions of array '%s' from shape, strides, " "dim_tags, or dim_names" % name) del new_num_user_axes # }}} # {{{ convert order to dim_tags if order is None and self.max_target_axes > 1: # FIXME: Hackety hack. ImageArgs need to generate dim_tags even # if no order is specified. Plus they don't care that much. order = "C" if dim_tags is None and num_user_axes is not None and order is not None: dim_tags = parse_array_dim_tags(num_user_axes*[order], n_axes=num_user_axes, use_increasing_target_axes=self.max_target_axes > 1, dim_names=dim_names) order = None # }}} if dim_tags is not None: # {{{ find number of target axes target_axes = set() for dim_tag in dim_tags: if isinstance(dim_tag, _StrideArrayDimTagBase): target_axes.add(dim_tag.target_axis) if target_axes != set(range(len(target_axes))): raise LoopyError("target axes for variable '%s' are non-" "contiguous" % self.name) num_target_axes = len(target_axes) del target_axes # }}} if not (self.min_target_axes <= num_target_axes <= self.max_target_axes): raise LoopyError("%s only supports between %d and %d target axes " "('%s' has %d)" % (type(self).__name__, self.min_target_axes, self.max_target_axes, self.name, num_target_axes)) new_dim_tags = convert_computed_to_fixed_dim_tags( name, num_user_axes, num_target_axes, shape, dim_tags) if new_dim_tags is not None: # successfully normalized dim_tags = new_dim_tags del new_dim_tags if dim_tags is not None: # for hashability dim_tags = tuple(dim_tags) order = None if strides is not None: # Preserve strides if we weren't able to process them yet. # That only happens if they're set to loopy.auto (and 'guessed' # in loopy.kernel.creation). kwargs["strides"] = strides if dim_names is not None and not isinstance(dim_names, tuple): from warnings import warn warn("dim_names is not a tuple when calling ArrayBase constructor", DeprecationWarning, stacklevel=2) ImmutableRecord.__init__(self, name=name, dtype=dtype, shape=shape, dim_tags=dim_tags, offset=offset, dim_names=dim_names, order=order, alignment=alignment, for_atomic=for_atomic, **kwargs)
def __compare(d1, d2): # compare dtypes ignoring atomic return to_loopy_type(d1, for_atomic=True) == \ to_loopy_type(d2, for_atomic=True)
def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, **kwargs): """ All of the following are optional. Specify either strides or shape. :arg name: May contain multiple names separated by commas, in which case multiple arguments, each with identical properties, are created for each name. :arg dtype: the :class:`numpy.dtype` of the array. If this is *None*, :mod:`loopy` will try to continue without knowing the type of this array, where the idea is that precise knowledge of the type will become available at invocation time. :class:`loopy.CompiledKernel` (and thereby :meth:`loopy.LoopKernel.__call__`) automatically add this type information based on invocation arguments. Note that some transformations, such as :func:`loopy.add_padding` cannot be performed without knowledge of the exact *dtype*. :arg shape: May be one of the following: * *None*. In this case, no shape is intended to be specified, only the strides will be used to access the array. Bounds checking will not be performed. * :class:`loopy.auto`. The shape will be determined by finding the access footprint. * a tuple like like :attr:`numpy.ndarray.shape`. Each entry of the tuple is also allowed to be a :mod:`pymbolic` expression involving kernel parameters, or a (potentially-comma separated) or a string that can be parsed to such an expression. Any element of the shape tuple not used to compute strides may be *None*. * A string which can be parsed into the previous form. :arg dim_tags: A comma-separated list of tags as understood by :func:`parse_array_dim_tag`. :arg strides: May be one of the following: * None * :class:`loopy.auto`. The strides will be determined by *order* and the access footprint. * a tuple like like :attr:`numpy.ndarray.shape`. Each entry of the tuple is also allowed to be a :mod:`pymbolic` expression involving kernel parameters, or a (potentially-comma separated) or a string that can be parsed to such an expression. * A string which can be parsed into the previous form. :arg order: "F" or "C" for C (row major) or Fortran (column major). Defaults to the *default_order* argument passed to :func:`loopy.make_kernel`. :arg for_atomic: Whether the array is declared for atomic access, and, if necessary, using atomic-capable data types. :arg offset: Offset from the beginning of the buffer to the point from which the strides are counted. May be one of * 0 * a string (that is interpreted as an argument name). * :class:`loopy.auto`, in which case an offset argument is added automatically, immediately following this argument. :class:`loopy.CompiledKernel` is even smarter in its treatment of this case and will compile custom versions of the kernel based on whether the passed arrays have offsets or not. """ for kwarg_name in kwargs: if kwarg_name not in self.allowed_extra_kwargs: raise TypeError("invalid kwarg: %s" % kwarg_name) import loopy as lp from loopy.types import to_loopy_type dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True, for_atomic=for_atomic) strides_known = strides is not None and strides is not lp.auto shape_known = shape is not None and shape is not lp.auto if strides_known: strides = _parse_shape_or_strides(strides) if shape_known: shape = _parse_shape_or_strides(shape) # {{{ check dim_names if dim_names is not None: if len(dim_names) != len(set(dim_names)): raise LoopyError("dim_names are not unique") for n in dim_names: if not isinstance(n, str): raise LoopyError("found non-string '%s' in dim_names" % type(n).__name__) # }}} # {{{ convert strides to dim_tags (Note: strides override order) if dim_tags is not None and strides_known: raise TypeError("may not specify both strides and dim_tags") if dim_tags is None and strides_known: dim_tags = [FixedStrideArrayDimTag(s) for s in strides] strides = None # }}} if dim_tags is not None: dim_tags = parse_array_dim_tags(dim_tags, n_axes=(len(shape) if shape_known else None), use_increasing_target_axes=self.max_target_axes > 1, dim_names=dim_names) # {{{ determine number of user axes num_user_axes = None if shape_known: num_user_axes = len(shape) for dim_iterable in [dim_tags, dim_names]: if dim_iterable is not None: new_num_user_axes = len(dim_iterable) if num_user_axes is None: num_user_axes = new_num_user_axes else: if new_num_user_axes != num_user_axes: raise LoopyError("contradictory values for number of " "dimensions of array '%s' from shape, strides, " "dim_tags, or dim_names" % name) del new_num_user_axes # }}} # {{{ convert order to dim_tags if order is None and self.max_target_axes > 1: # FIXME: Hackety hack. ImageArgs need to generate dim_tags even # if no order is specified. Plus they don't care that much. order = "C" if dim_tags is None and num_user_axes is not None and order is not None: dim_tags = parse_array_dim_tags(num_user_axes*[order], n_axes=num_user_axes, use_increasing_target_axes=self.max_target_axes > 1, dim_names=dim_names) order = None # }}} if dim_tags is not None: # {{{ find number of target axes target_axes = set() for dim_tag in dim_tags: if isinstance(dim_tag, _StrideArrayDimTagBase): target_axes.add(dim_tag.target_axis) if target_axes != set(range(len(target_axes))): raise LoopyError("target axes for variable '%s' are non-" "contiguous" % self.name) num_target_axes = len(target_axes) del target_axes # }}} if not (self.min_target_axes <= num_target_axes <= self.max_target_axes): raise LoopyError("%s only supports between %d and %d target axes " "('%s' has %d)" % (type(self).__name__, self.min_target_axes, self.max_target_axes, self.name, num_target_axes)) new_dim_tags = convert_computed_to_fixed_dim_tags( name, num_user_axes, num_target_axes, shape, dim_tags) if new_dim_tags is not None: # successfully normalized dim_tags = new_dim_tags del new_dim_tags if dim_tags is not None: # for hashability dim_tags = tuple(dim_tags) order = None if strides is not None: # Preserve strides if we weren't able to process them yet. # That only happens if they're set to loopy.auto (and 'guessed' # in loopy.kernel.creation). kwargs["strides"] = strides if dim_names is not None and not isinstance(dim_names, tuple): from warnings import warn warn("dim_names is not a tuple when calling ArrayBase constructor", DeprecationWarning, stacklevel=2) Record.__init__(self, name=name, dtype=dtype, shape=shape, dim_tags=dim_tags, offset=offset, dim_names=dim_names, order=order, **kwargs)
def func_filter(key): return key.lid_strides == {} and key.dtype == to_loopy_type(np.float64) and \ key.direction == 'load'
def symbol_x(knl, name): if name == "X": from loopy.types import to_loopy_type return to_loopy_type(np.float32), "X"
def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, **kwargs): """ All of the following are optional. Specify either strides or shape. :arg name: May contain multiple names separated by commas, in which case multiple arguments, each with identical properties, are created for each name. :arg dtype: the :class:`numpy.dtype` of the array. If this is *None*, :mod:`loopy` will try to continue without knowing the type of this array, where the idea is that precise knowledge of the type will become available at invocation time. :class:`loopy.CompiledKernel` (and thereby :meth:`loopy.LoopKernel.__call__`) automatically add this type information based on invocation arguments. Note that some transformations, such as :func:`loopy.add_padding` cannot be performed without knowledge of the exact *dtype*. :arg shape: May be one of the following: * *None*. In this case, no shape is intended to be specified, only the strides will be used to access the array. Bounds checking will not be performed. * :class:`loopy.auto`. The shape will be determined by finding the access footprint. * a tuple like like :attr:`numpy.ndarray.shape`. Each entry of the tuple is also allowed to be a :mod:`pymbolic` expression involving kernel parameters, or a (potentially-comma separated) or a string that can be parsed to such an expression. Any element of the shape tuple not used to compute strides may be *None*. * A string which can be parsed into the previous form. :arg dim_tags: A comma-separated list of tags as understood by :func:`parse_array_dim_tag`. :arg strides: May be one of the following: * None * :class:`loopy.auto`. The strides will be determined by *order* and the access footprint. * a tuple like like :attr:`numpy.ndarray.shape`. Each entry of the tuple is also allowed to be a :mod:`pymbolic` expression involving kernel parameters, or a (potentially-comma separated) or a string that can be parsed to such an expression. * A string which can be parsed into the previous form. :arg order: "F" or "C" for C (row major) or Fortran (column major). Defaults to the *default_order* argument passed to :func:`loopy.make_kernel`. :arg for_atomic: Whether the array is declared for atomic access, and, if necessary, using atomic-capable data types. :arg offset: Offset from the beginning of the buffer to the point from which the strides are counted. May be one of * 0 * a string (that is interpreted as an argument name). * :class:`loopy.auto`, in which case an offset argument is added automatically, immediately following this argument. :class:`loopy.CompiledKernel` is even smarter in its treatment of this case and will compile custom versions of the kernel based on whether the passed arrays have offsets or not. """ for kwarg_name in kwargs: if kwarg_name not in self.allowed_extra_kwargs: raise TypeError("invalid kwarg: %s" % kwarg_name) import loopy as lp from loopy.types import to_loopy_type dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True, for_atomic=for_atomic) strides_known = strides is not None and strides is not lp.auto shape_known = shape is not None and shape is not lp.auto if strides_known: strides = _parse_shape_or_strides(strides) if shape_known: shape = _parse_shape_or_strides(shape) # {{{ check dim_names if dim_names is not None: if len(dim_names) != len(set(dim_names)): raise LoopyError("dim_names are not unique") for n in dim_names: if not isinstance(n, str): raise LoopyError("found non-string '%s' in dim_names" % type(n).__name__) # }}} # {{{ convert strides to dim_tags (Note: strides override order) if dim_tags is not None and strides_known: raise TypeError("may not specify both strides and dim_tags") if dim_tags is None and strides_known: dim_tags = [FixedStrideArrayDimTag(s) for s in strides] strides = None # }}} if dim_tags is not None: dim_tags = parse_array_dim_tags(dim_tags, n_axes=(len(shape) if shape_known else None), use_increasing_target_axes=self.max_target_axes > 1, dim_names=dim_names) # {{{ determine number of user axes num_user_axes = None if shape_known: num_user_axes = len(shape) for dim_iterable in [dim_tags, dim_names]: if dim_iterable is not None: new_num_user_axes = len(dim_iterable) if num_user_axes is None: num_user_axes = new_num_user_axes else: if new_num_user_axes != num_user_axes: raise LoopyError("contradictory values for number of " "dimensions of array '%s' from shape, strides, " "dim_tags, or dim_names" % name) del new_num_user_axes # }}} # {{{ convert order to dim_tags if order is None and self.max_target_axes > 1: # FIXME: Hackety hack. ImageArgs need to generate dim_tags even # if no order is specified. Plus they don't care that much. order = "C" if dim_tags is None and num_user_axes is not None and order is not None: dim_tags = parse_array_dim_tags(num_user_axes*[order], n_axes=num_user_axes, use_increasing_target_axes=self.max_target_axes > 1, dim_names=dim_names) order = None # }}} if dim_tags is not None: # {{{ find number of target axes target_axes = set() for dim_tag in dim_tags: if isinstance(dim_tag, _StrideArrayDimTagBase): target_axes.add(dim_tag.target_axis) if target_axes != set(range(len(target_axes))): raise LoopyError("target axes for variable '%s' are non-" "contiguous" % self.name) num_target_axes = len(target_axes) del target_axes # }}} if not (self.min_target_axes <= num_target_axes <= self.max_target_axes): raise LoopyError("%s only supports between %d and %d target axes " "('%s' has %d)" % (type(self).__name__, self.min_target_axes, self.max_target_axes, self.name, num_target_axes)) new_dim_tags = convert_computed_to_fixed_dim_tags( name, num_user_axes, num_target_axes, shape, dim_tags) if new_dim_tags is not None: # successfully normalized dim_tags = new_dim_tags del new_dim_tags if dim_tags is not None: # for hashability dim_tags = tuple(dim_tags) order = None if strides is not None: # Preserve strides if we weren't able to process them yet. # That only happens if they're set to loopy.auto (and 'guessed' # in loopy.kernel.creation). kwargs["strides"] = strides if dim_names is not None and not isinstance(dim_names, tuple): pu.db from warnings import warn warn("dim_names is not a tuple when calling ArrayBase constructor", DeprecationWarning, stacklevel=2) Record.__init__(self, name=name, dtype=dtype, shape=shape, dim_tags=dim_tags, offset=offset, dim_names=dim_names, order=order, **kwargs)
def __init__(self, domains, instructions, args=None, schedule=None, name="loopy_kernel", preambles=None, preamble_generators=None, assumptions=None, local_sizes=None, temporary_variables=None, iname_to_tags=None, substitutions=None, function_manglers=None, symbol_manglers=[], iname_slab_increments=None, loop_priority=frozenset(), silenced_warnings=None, applied_iname_rewrites=None, cache_manager=None, index_dtype=np.int32, options=None, state=KernelState.INITIAL, target=None, overridden_get_grid_sizes_for_insn_ids=None, _cached_written_variables=None): """ :arg overridden_get_grid_sizes_for_insn_ids: A callable. When kernels get intersected in slab decomposition, their grid sizes shouldn't change. This provides a way to forward sub-kernel grid size requests. """ # {{{ process constructor arguments if args is None: args = [] if preambles is None: preambles = [] if preamble_generators is None: preamble_generators = [] if local_sizes is None: local_sizes = {} if temporary_variables is None: temporary_variables = {} if iname_to_tags is None: iname_to_tags = {} if substitutions is None: substitutions = {} if function_manglers is None: function_manglers = [ default_function_mangler, single_arg_function_mangler, ] if symbol_manglers is None: function_manglers = [ default_function_mangler, single_arg_function_mangler, ] if iname_slab_increments is None: iname_slab_increments = {} if silenced_warnings is None: silenced_warnings = [] if applied_iname_rewrites is None: applied_iname_rewrites = [] if cache_manager is None: from loopy.kernel.tools import SetOperationCacheManager cache_manager = SetOperationCacheManager() # }}} # {{{ process assumptions if assumptions is None: dom0_space = domains[0].get_space() assumptions_space = isl.Space.params_alloc( dom0_space.get_ctx(), dom0_space.dim(dim_type.param)) for i in range(dom0_space.dim(dim_type.param)): assumptions_space = assumptions_space.set_dim_name( dim_type.param, i, dom0_space.get_dim_name(dim_type.param, i)) assumptions = isl.BasicSet.universe(assumptions_space) elif isinstance(assumptions, str): assumptions_set_str = "[%s] -> { : %s}" \ % (",".join(s for s in self.outer_params(domains)), assumptions) assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(), assumptions_set_str) assert assumptions.is_params() # }}} from loopy.types import to_loopy_type index_dtype = to_loopy_type(index_dtype, target=target) if not index_dtype.is_integral(): raise TypeError("index_dtype must be an integer") if np.iinfo(index_dtype.numpy_dtype).min >= 0: raise TypeError("index_dtype must be signed") if state not in [ KernelState.INITIAL, KernelState.PREPROCESSED, KernelState.SCHEDULED, ]: raise ValueError("invalid value for 'state'") from collections import defaultdict assert not isinstance(iname_to_tags, defaultdict) for iname, tags in six.iteritems(iname_to_tags): # don't tolerate empty sets assert tags assert isinstance(tags, frozenset) assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT ImmutableRecordWithoutPickling.__init__(self, domains=domains, instructions=instructions, args=args, schedule=schedule, name=name, preambles=preambles, preamble_generators=preamble_generators, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, iname_to_tags=iname_to_tags, substitutions=substitutions, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, function_manglers=function_manglers, symbol_manglers=symbol_manglers, index_dtype=index_dtype, options=options, state=state, target=target, overridden_get_grid_sizes_for_insn_ids=( overridden_get_grid_sizes_for_insn_ids), _cached_written_variables=_cached_written_variables) self._kernel_executor_cache = {}
def test_working_buffers(self): # test vector to ensure the various working buffer configurations work # (i.e., locals) oploop = OptionLoopWrapper.from_get_oploop(self, do_conp=False, do_vector=True, do_sparse=False) for opts in oploop: # get the dummy generator kgen = self._kernel_gen(opts, include_jac_lookup=True) # make kernels kgen._make_kernels() # process the arguements record, _ = kgen._process_args() # test that process memory works record, mem_limits = kgen._process_memory(record) # and generate working buffers recordnew, result = kgen._compress_to_working_buffer(record) if opts.depth: # check for local assert next((x for x in recordnew.kernel_data if x.address_space == scopes.LOCAL), None) def __check_unpacks(unpacks, offsets, args): for arg in args: # check that all args are in the unpacks unpack = next((x for x in unpacks if re.search(r'\b' + arg.name + r'\b', x)), None) assert unpack # next check the type assert kgen.type_map[arg.dtype] in unpack # and scope, if needed if arg.address_space == scopes.LOCAL: assert 'local' in unpack assert local_work_name in unpack assert 'volatile' in unpack else: assert rhs_work_name in unpack # and in offset assert arg.name in offsets def __check_local_unpacks(result, args): for i, arg in enumerate(args): # get offset offsets = result.pointer_offsets[arg.name][2] new = kgen._get_local_unpacks(result, [arg]) if not new.pointer_unpacks: assert isinstance(arg, lp.TemporaryVariable) else: # and check assert re.search(r'\b' + re.escape(offsets) + r'\b', new.pointer_unpacks[0]) # check that all args are in the pointer unpacks __check_unpacks( result.pointer_unpacks, result.pointer_offsets, recordnew.args + recordnew.local + recordnew.host_constants) # check unpacks for driver function (note: this isn't the 'local' scope # rather, local copies out of the working buffer) __check_local_unpacks(result, recordnew.args) # next, write a dummy input file, such that we can force the constant # memory allocation to zero with NamedTemporaryFile(suffix='.yaml', mode='w') as temp: temp.write(""" memory-limits: constant: 0 B """) temp.seek(0) # set file kgen.mem_limits = temp.name # reprocesses noconst, mem_limits = kgen._process_memory(record) noconst, result = kgen._compress_to_working_buffer(noconst) # check that we have an integer workspace int_type = to_loopy_type(arc.kint_type, target=kgen.target) assert next( (x for x in noconst.kernel_data if x.dtype == int_type), None) # and recheck pointer unpacks (including host constants) __check_unpacks( result.pointer_unpacks, result.pointer_offsets, recordnew.args + recordnew.local + record.constants) __check_local_unpacks( result, recordnew.args + recordnew.local + record.constants)
def __init__(self, domains, instructions, args=[], schedule=None, name="loopy_kernel", preambles=[], preamble_generators=[], assumptions=None, local_sizes={}, temporary_variables={}, iname_to_tag={}, substitutions={}, function_manglers=[ default_function_mangler, single_arg_function_mangler, ], symbol_manglers=[], iname_slab_increments={}, loop_priority=[], silenced_warnings=[], applied_iname_rewrites=[], cache_manager=None, index_dtype=np.int32, options=None, state=kernel_state.INITIAL, target=None, # When kernels get intersected in slab decomposition, # their grid sizes shouldn't change. This provides # a way to forward sub-kernel grid size requests. get_grid_sizes_for_insn_ids=None): if cache_manager is None: from loopy.kernel.tools import SetOperationCacheManager cache_manager = SetOperationCacheManager() # {{{ make instruction ids unique from loopy.kernel.creation import UniqueName insn_ids = set() for insn in instructions: if insn.id is not None and not isinstance(insn.id, UniqueName): if insn.id in insn_ids: raise RuntimeError("duplicate instruction id: %s" % insn.id) insn_ids.add(insn.id) insn_id_gen = UniqueNameGenerator(insn_ids) new_instructions = [] for insn in instructions: if insn.id is None: new_instructions.append( insn.copy(id=insn_id_gen("insn"))) elif isinstance(insn.id, UniqueName): new_instructions.append( insn.copy(id=insn_id_gen(insn.id.name))) else: new_instructions.append(insn) instructions = new_instructions del new_instructions # }}} # {{{ process assumptions if assumptions is None: dom0_space = domains[0].get_space() assumptions_space = isl.Space.params_alloc( dom0_space.get_ctx(), dom0_space.dim(dim_type.param)) for i in range(dom0_space.dim(dim_type.param)): assumptions_space = assumptions_space.set_dim_name( dim_type.param, i, dom0_space.get_dim_name(dim_type.param, i)) assumptions = isl.BasicSet.universe(assumptions_space) elif isinstance(assumptions, str): assumptions_set_str = "[%s] -> { : %s}" \ % (",".join(s for s in self.outer_params(domains)), assumptions) assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(), assumptions_set_str) assert assumptions.is_params() # }}} from loopy.types import to_loopy_type index_dtype = to_loopy_type(index_dtype).with_target(target) if not index_dtype.is_integral(): raise TypeError("index_dtype must be an integer") if np.iinfo(index_dtype.numpy_dtype).min >= 0: raise TypeError("index_dtype must be signed") if get_grid_sizes_for_insn_ids is not None: # overwrites method down below self.get_grid_sizes_for_insn_ids = get_grid_sizes_for_insn_ids if state not in [ kernel_state.INITIAL, kernel_state.PREPROCESSED, kernel_state.SCHEDULED, ]: raise ValueError("invalid value for 'state'") assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT RecordWithoutPickling.__init__(self, domains=domains, instructions=instructions, args=args, schedule=schedule, name=name, preambles=preambles, preamble_generators=preamble_generators, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, iname_to_tag=iname_to_tag, substitutions=substitutions, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, function_manglers=function_manglers, symbol_manglers=symbol_manglers, index_dtype=index_dtype, options=options, state=state, target=target)
def __init__( self, domains, instructions, args=[], schedule=None, name="loopy_kernel", preambles=[], preamble_generators=[], assumptions=None, local_sizes={}, temporary_variables={}, iname_to_tag={}, substitutions={}, function_manglers=[ default_function_mangler, single_arg_function_mangler, ], symbol_manglers=[], iname_slab_increments={}, loop_priority=frozenset(), silenced_warnings=[], applied_iname_rewrites=[], cache_manager=None, index_dtype=np.int32, options=None, state=kernel_state.INITIAL, target=None, # When kernels get intersected in slab decomposition, # their grid sizes shouldn't change. This provides # a way to forward sub-kernel grid size requests. get_grid_sizes_for_insn_ids=None): if cache_manager is None: from loopy.kernel.tools import SetOperationCacheManager cache_manager = SetOperationCacheManager() # {{{ make instruction ids unique from loopy.kernel.creation import UniqueName insn_ids = set() for insn in instructions: if insn.id is not None and not isinstance(insn.id, UniqueName): if insn.id in insn_ids: raise RuntimeError("duplicate instruction id: %s" % insn.id) insn_ids.add(insn.id) insn_id_gen = UniqueNameGenerator(insn_ids) new_instructions = [] for insn in instructions: if insn.id is None: new_instructions.append(insn.copy(id=insn_id_gen("insn"))) elif isinstance(insn.id, UniqueName): new_instructions.append( insn.copy(id=insn_id_gen(insn.id.name))) else: new_instructions.append(insn) instructions = new_instructions del new_instructions # }}} # {{{ process assumptions if assumptions is None: dom0_space = domains[0].get_space() assumptions_space = isl.Space.params_alloc( dom0_space.get_ctx(), dom0_space.dim(dim_type.param)) for i in range(dom0_space.dim(dim_type.param)): assumptions_space = assumptions_space.set_dim_name( dim_type.param, i, dom0_space.get_dim_name(dim_type.param, i)) assumptions = isl.BasicSet.universe(assumptions_space) elif isinstance(assumptions, str): assumptions_set_str = "[%s] -> { : %s}" \ % (",".join(s for s in self.outer_params(domains)), assumptions) assumptions = isl.BasicSet.read_from_str(domains[0].get_ctx(), assumptions_set_str) assert assumptions.is_params() # }}} from loopy.types import to_loopy_type index_dtype = to_loopy_type(index_dtype, target=target) if not index_dtype.is_integral(): raise TypeError("index_dtype must be an integer") if np.iinfo(index_dtype.numpy_dtype).min >= 0: raise TypeError("index_dtype must be signed") if get_grid_sizes_for_insn_ids is not None: # overwrites method down below self.get_grid_sizes_for_insn_ids = get_grid_sizes_for_insn_ids if state not in [ kernel_state.INITIAL, kernel_state.PREPROCESSED, kernel_state.SCHEDULED, ]: raise ValueError("invalid value for 'state'") assert all(dom.get_ctx() == isl.DEFAULT_CONTEXT for dom in domains) assert assumptions.get_ctx() == isl.DEFAULT_CONTEXT ImmutableRecordWithoutPickling.__init__( self, domains=domains, instructions=instructions, args=args, schedule=schedule, name=name, preambles=preambles, preamble_generators=preamble_generators, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, iname_to_tag=iname_to_tag, substitutions=substitutions, cache_manager=cache_manager, applied_iname_rewrites=applied_iname_rewrites, function_manglers=function_manglers, symbol_manglers=symbol_manglers, index_dtype=index_dtype, options=options, state=state, target=target) self._kernel_executor_cache = {}
def __init__(self, name, dtype=None, shape=(), scope=auto, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, base_indices=None, storage_shape=None, base_storage=None, initializer=None, read_only=False, **kwargs): """ :arg dtype: :class:`loopy.auto` or a :class:`numpy.dtype` :arg shape: :class:`loopy.auto` or a shape tuple :arg base_indices: :class:`loopy.auto` or a tuple of base indices """ if initializer is None: pass elif isinstance(initializer, np.ndarray): if offset != 0: raise LoopyError("temporary variable '%s': " "offset must be 0 if initializer specified" % name) from loopy.types import NumpyType, to_loopy_type if dtype is auto or dtype is None: dtype = NumpyType(initializer.dtype) elif to_loopy_type(dtype) != to_loopy_type(initializer.dtype): raise LoopyError("temporary variable '%s': " "dtype of initializer does not match " "dtype of array." % name) if shape is auto: shape = initializer.shape else: raise LoopyError("temporary variable '%s': " "initializer must be None or a numpy array" % name) if order is None: order = "C" if base_indices is None: base_indices = (0, ) * len(shape) if not read_only and initializer is not None: raise LoopyError("temporary variable '%s': " "read-write variables with initializer " "are not currently supported " "(did you mean to set read_only=True?)" % name) if base_storage is not None and initializer is not None: raise LoopyError("temporary variable '%s': " "base_storage and initializer are " "mutually exclusive" % name) ArrayBase.__init__(self, name=intern(name), dtype=dtype, shape=shape, dim_tags=dim_tags, offset=offset, dim_names=dim_names, order=order, base_indices=base_indices, scope=scope, storage_shape=storage_shape, base_storage=base_storage, initializer=initializer, read_only=read_only, **kwargs)
def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, target=None, alignment=None, **kwargs): """ All of the following (except *name*) are optional. Specify either strides or shape. :arg name: When passed to :class:`loopy.make_kernel`, this may contain multiple names separated by commas, in which case multiple arguments, each with identical properties, are created for each name. :arg shape: May be any of the things specified under :attr:`shape`, or a string which can be parsed into the previous form. :arg dim_tags: A comma-separated list of tags as understood by :func:`loopy.kernel.array.parse_array_dim_tags`. :arg strides: May be one of the following: * None * :class:`loopy.auto`. The strides will be determined by *order* and the access footprint. * a tuple like like :attr:`numpy.ndarray.shape`. Each entry of the tuple is also allowed to be a :mod:`pymbolic` expression involving kernel parameters, or a (potentially-comma separated) or a string that can be parsed to such an expression. * A string which can be parsed into the previous form. :arg order: "F" or "C" for C (row major) or Fortran (column major). Defaults to the *default_order* argument passed to :func:`loopy.make_kernel`. :arg for_atomic: Whether the array is declared for atomic access, and, if necessary, using atomic-capable data types. :arg offset: (See :attr:`offset`) :arg alignment: memory alignment in bytes """ for kwarg_name in kwargs: if kwarg_name not in self.allowed_extra_kwargs: raise TypeError("invalid kwarg: %s" % kwarg_name) import loopy as lp from loopy.types import to_loopy_type dtype = to_loopy_type(dtype, allow_auto=True, allow_none=True, for_atomic=for_atomic, target=target) if dtype is lp.auto: from warnings import warn warn( "Argument/temporary data type for '%s' should be None if " "unspecified, not auto. This usage will be disallowed in 2018." % name, DeprecationWarning, stacklevel=2) dtype = None strides_known = strides is not None and strides is not lp.auto shape_known = shape is not None and shape is not lp.auto if strides_known: strides = _parse_shape_or_strides(strides) if shape_known: shape = _parse_shape_or_strides(shape) # {{{ check dim_names if dim_names is not None: if len(dim_names) != len(set(dim_names)): raise LoopyError("dim_names are not unique") for n in dim_names: if not isinstance(n, str): raise LoopyError("found non-string '%s' in dim_names" % type(n).__name__) # }}} # {{{ convert strides to dim_tags (Note: strides override order) if dim_tags is not None and strides_known: raise TypeError("may not specify both strides and dim_tags") if dim_tags is None and strides_known: dim_tags = [FixedStrideArrayDimTag(s) for s in strides] strides = None # }}} if dim_tags is not None: dim_tags = parse_array_dim_tags( dim_tags, n_axes=(len(shape) if shape_known else None), use_increasing_target_axes=self.max_target_axes > 1, dim_names=dim_names) # {{{ determine number of user axes num_user_axes = None if shape_known: num_user_axes = len(shape) for dim_iterable in [dim_tags, dim_names]: if dim_iterable is not None: new_num_user_axes = len(dim_iterable) if num_user_axes is None: num_user_axes = new_num_user_axes else: if new_num_user_axes != num_user_axes: raise LoopyError( "contradictory values for number of " "dimensions of array '%s' from shape, strides, " "dim_tags, or dim_names" % name) del new_num_user_axes # }}} # {{{ convert order to dim_tags if order is None and self.max_target_axes > 1: # FIXME: Hackety hack. ImageArgs need to generate dim_tags even # if no order is specified. Plus they don't care that much. order = "C" if dim_tags is None and num_user_axes is not None and order is not None: dim_tags = parse_array_dim_tags( num_user_axes * [order], n_axes=num_user_axes, use_increasing_target_axes=self.max_target_axes > 1, dim_names=dim_names) order = None # }}} if dim_tags is not None: # {{{ find number of target axes target_axes = set() for dim_tag in dim_tags: if isinstance(dim_tag, _StrideArrayDimTagBase): target_axes.add(dim_tag.target_axis) if target_axes != set(range(len(target_axes))): raise LoopyError("target axes for variable '%s' are non-" "contiguous" % self.name) num_target_axes = len(target_axes) del target_axes # }}} if not (self.min_target_axes <= num_target_axes <= self.max_target_axes): raise LoopyError( "%s only supports between %d and %d target axes " "('%s' has %d)" % (type(self).__name__, self.min_target_axes, self.max_target_axes, self.name, num_target_axes)) new_dim_tags = convert_computed_to_fixed_dim_tags( name, num_user_axes, num_target_axes, shape, dim_tags) if new_dim_tags is not None: # successfully normalized dim_tags = new_dim_tags del new_dim_tags if dim_tags is not None: # for hashability dim_tags = tuple(dim_tags) order = None if strides is not None: # Preserve strides if we weren't able to process them yet. # That only happens if they're set to loopy.auto (and 'guessed' # in loopy.kernel.creation). kwargs["strides"] = strides if dim_names is not None and not isinstance(dim_names, tuple): from warnings import warn warn("dim_names is not a tuple when calling ArrayBase constructor", DeprecationWarning, stacklevel=2) ImmutableRecord.__init__(self, name=name, dtype=dtype, shape=shape, dim_tags=dim_tags, offset=offset, dim_names=dim_names, order=order, alignment=alignment, for_atomic=for_atomic, **kwargs)