def map_constant(self, expr): if is_integer(expr): for tp in [np.int32, np.int64]: iinfo = np.iinfo(tp) if iinfo.min <= expr <= iinfo.max: return np.dtype(tp) else: raise TypeInferenceFailure("integer constant '%s' too large" % expr) dt = np.asarray(expr).dtype if hasattr(expr, "dtype"): return expr.dtype elif isinstance(expr, np.number): # Numpy types are sized return np.dtype(type(expr)) elif dt.kind == "f": # deduce the smaller type by default return np.dtype(np.float32) elif dt.kind == "c": if np.complex64(expr) == np.complex128(expr): # (COMPLEX_GUESS_LOGIC) # No precision is lost by 'guessing' single precision, use that. # This at least covers simple cases like '1j'. return np.dtype(np.complex64) # Codegen for complex types depends on exactly correct types. # Refuse temptation to guess. raise TypeInferenceFailure("Complex constant '%s' needs to " "be sized for type inference " % expr) else: raise TypeInferenceFailure("Cannot deduce type of constant '%s'" % expr)
def map_constant(self, expr): if is_integer(expr): for tp in [np.int32, np.int64]: iinfo = np.iinfo(tp) if iinfo.min <= expr <= iinfo.max: return [NumpyType(np.dtype(tp))] else: raise TypeInferenceFailure("integer constant '%s' too large" % expr) dt = np.asarray(expr).dtype if hasattr(expr, "dtype"): return [NumpyType(expr.dtype)] elif isinstance(expr, np.number): # Numpy types are sized return [NumpyType(np.dtype(type(expr)))] elif dt.kind == "f": # deduce the smaller type by default return [NumpyType(np.dtype(np.float32))] elif dt.kind == "c": if np.complex64(expr) == np.complex128(expr): # (COMPLEX_GUESS_LOGIC) # No precision is lost by 'guessing' single precision, use that. # This at least covers simple cases like '1j'. return [NumpyType(np.dtype(np.complex64))] # Codegen for complex types depends on exactly correct types. # Refuse temptation to guess. raise TypeInferenceFailure("Complex constant '%s' needs to " "be sized for type inference " % expr) else: raise TypeInferenceFailure("Cannot deduce type of constant '%s'" % expr)
def map_constant(self, expr, enclosing_prec, type_context): if isinstance(expr, (complex, np.complexfloating)): try: dtype = expr.dtype except AttributeError: # (COMPLEX_GUESS_LOGIC) # This made it through type 'guessing' above, and it # was concluded above (search for COMPLEX_GUESS_LOGIC), # that nothing was lost by using single precision. cast_type = "cfloat" else: if dtype == np.complex128: cast_type = "cdouble" elif dtype == np.complex64: cast_type = "cfloat" else: raise RuntimeError( "unsupported complex type in expression " "generation: %s" % type(expr)) return "%s_new(%s, %s)" % (cast_type, repr( expr.real), repr(expr.imag)) else: if type_context == "f": return repr(float(expr)) + "f" elif type_context == "d": return repr(float(expr)) elif type_context == "i": return str(int(expr)) else: if is_integer(expr): return str(expr) raise RuntimeError("don't know how to generate code " "for constant '%s'" % expr)
def map_constant(self, expr, enclosing_prec, type_context): if isinstance(expr, (complex, np.complexfloating)): try: dtype = expr.dtype except AttributeError: # (COMPLEX_GUESS_LOGIC) # This made it through type 'guessing' above, and it # was concluded above (search for COMPLEX_GUESS_LOGIC), # that nothing was lost by using single precision. cast_type = "cfloat" else: if dtype == np.complex128: cast_type = "cdouble" elif dtype == np.complex64: cast_type = "cfloat" else: raise RuntimeError("unsupported complex type in expression " "generation: %s" % type(expr)) return "%s_new(%s, %s)" % (cast_type, repr(expr.real), repr(expr.imag)) else: if type_context == "f": return repr(float(expr))+"f" elif type_context == "d": return repr(float(expr)) elif type_context == "i": return str(int(expr)) else: if is_integer(expr): return str(expr) raise RuntimeError("don't know how to generate code " "for constant '%s'" % expr)
def map_constant(self, expr, type_context): from loopy.symbolic import Literal if isinstance(expr, (complex, np.complexfloating)): real = self.rec(expr.real) imag = self.rec(expr.imag) iota = p.Variable("I" if "I" not in self.kernel.all_variable_names( ) else "_Complex_I") return real + imag * iota elif np.isnan(expr): return p.Variable("NAN") elif np.isneginf(expr): return -p.Variable("INFINITY") elif np.isinf(expr): return p.Variable("INFINITY") elif isinstance(expr, np.generic): # Explicitly typed: Generated code must reflect type exactly. # FIXME: This assumes a 32-bit architecture. if isinstance(expr, np.float32): return Literal(repr(expr) + "f") elif isinstance(expr, np.float64): return Literal(repr(expr)) # Disabled for now, possibly should be a subtarget. # elif isinstance(expr, np.float128): # return Literal(repr(expr)+"l") elif isinstance(expr, np.integer): suffix = "" iinfo = np.iinfo(expr) if iinfo.min == 0: suffix += "u" if iinfo.max > (2**31 - 1): suffix += "l" return Literal(repr(expr) + suffix) elif isinstance(expr, np.bool_): return Literal("true") if expr else Literal("false") else: raise LoopyError("do not know how to generate code for " "constant of numpy type '%s'" % type(expr).__name__) elif np.isfinite(expr): if type_context == "f": return Literal(repr(np.float32(expr)) + "f") elif type_context == "d": return Literal(repr(float(expr))) elif type_context in ["i", "b"]: return int(expr) else: if is_integer(expr): return int(expr) raise RuntimeError("don't know how to generate code " "for constant '%s'" % expr) else: raise LoopyError("don't know how to generate code " "for constant '%s'" % expr)
def check_temp_variable_shapes_are_constant(kernel): for tv in six.itervalues(kernel.temporary_variables): if any(not is_integer(s_i) for s_i in tv.shape): raise LoopyError( "shape of temporary variable '%s' is not " "constant (but has to be since the size of " "the temporary needs to be known at build time). " "Use loopy.fix_parameters to set variables to " "constant values." % tv.name )
def sep_shape(self): sep_shape = [] for shape_i, dim_tag in zip(self.shape, self.dim_tags): if isinstance(dim_tag, SeparateArrayArrayDimTag): if not is_integer(shape_i): raise TypeError("array '%s' has non-fixed-size " "separate-array axis" % self.name) sep_shape.append(shape_i) return tuple(sep_shape)
def map_sum(self, expr): dtype_sets = [] small_integer_dtype_sets = [] for child in expr.children: dtype_set = self.rec(child) if is_integer(child) and abs(child) < 1024: small_integer_dtype_sets.append(dtype_set) else: dtype_sets.append(dtype_set) if all(dtype.is_integral() for dtype_set in dtype_sets for dtype in dtype_set): dtype_sets.extend(small_integer_dtype_sets) return self.combine(dtype_sets)
def map_sum(self, expr): dtypes = [] small_integer_dtypes = [] for child in expr.children: dtype = self.rec(child) if is_integer(child) and abs(child) < 1024: small_integer_dtypes.append(dtype) else: dtypes.append(dtype) from pytools import all if all(dtype.kind == "i" for dtype in dtypes): dtypes.extend(small_integer_dtypes) return self.combine(dtypes)
def eval_expr_assert_integer_constant(i, expr): from pymbolic.mapper.evaluator import UnknownVariableError try: result = eval_expr(expr) except UnknownVariableError as e: raise LoopyError("When trying to index the array '%s' along axis " "%d (tagged '%s'), the index was not a compile-time " "constant (but it has to be in order for code to be " "generated). You likely want to unroll the iname(s) '%s'." % (ary.name, i, ary.dim_tags[i], str(e))) if not is_integer(result): raise LoopyError("subscript '%s[%s]' has non-constant " "index for separate-array axis %d (0-based)" % ( ary.name, index, i)) return result
def map_constant(self, expr, type_context): if isinstance(expr, (complex, np.complexfloating)): raise NotImplementedError("complex numbers in ispc") else: if type_context == "f": return Literal(repr(float(expr))) elif type_context == "d": # Keepin' the good ideas flowin' since '66. return Literal(repr(float(expr))+"d") elif type_context == "i": return expr else: from loopy.tools import is_integer if is_integer(expr): return expr raise RuntimeError("don't know how to generate code " "for constant '%s'" % expr)
def map_constant(self, expr, enclosing_prec, type_context): if isinstance(expr, (complex, np.complexfloating)): raise NotImplementedError("complex numbers in ispc") else: if type_context == "f": return repr(float(expr)) elif type_context == "d": # Keepin' the good ideas flowin' since '66. return repr(float(expr)) + "d" elif type_context == "i": return str(int(expr)) else: from loopy.tools import is_integer if is_integer(expr): return str(expr) raise RuntimeError("don't know how to generated code " "for constant '%s'" % expr)
def map_constant(self, expr): """Convert integer values not within the range of `self.int_type` to float. """ if not is_integer(expr): return IdentityMapper.map_constant(self, expr) if self.iinfo.min <= expr <= self.iinfo.max: return expr if self.warn: expr_as_float = self.float_type(expr) if int(expr_as_float) != int(expr): from warnings import warn warn("Converting '%d' to '%s' loses digits" % (expr, self.float_type.__name__)) # Suppress further warnings. self.warn = False return expr_as_float return self.float_type(expr)
def vector_size(self, target): """Return the size of the vector type used for the array divided by the basic data type. Note: For 3-vectors, this will be 4. """ if self.dim_tags is None: return 1 for i, dim_tag in enumerate(self.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): shape_i = self.shape[i] if not is_integer(shape_i): raise LoopyError("shape of '%s' has non-constant-integer " "length for vector axis %d (0-based)" % (self.name, i)) vec_dtype = target.vector_dtype(self.dtype, shape_i) return int(vec_dtype.itemsize) // int(self.dtype.itemsize) return 1
def vector_size(self, target): """Return the size of the vector type used for the array divided by the basic data type. Note: For 3-vectors, this will be 4. """ if self.dim_tags is None: return 1 for i, dim_tag in enumerate(self.dim_tags): if isinstance(dim_tag, VectorArrayDimTag): shape_i = self.shape[i] if not is_integer(shape_i): raise LoopyError("shape of '%s' has non-constant-integer " "length for vector axis %d (0-based)" % ( self.name, i)) vec_dtype = target.vector_dtype(self.dtype, shape_i) return int(vec_dtype.itemsize) // int(self.dtype.itemsize) return 1
def get_access_info(target, ary, index, eval_expr, vectorization_info): """ :arg ary: an object of type :class:`ArrayBase` :arg index: a tuple of indices representing a subscript into ary :arg vectorization_info: an instance of :class:`loopy.codegen.VectorizationInfo`, or *None*. """ import loopy as lp from pymbolic import var def eval_expr_assert_integer_constant(i, expr): from pymbolic.mapper.evaluator import UnknownVariableError try: result = eval_expr(expr) except UnknownVariableError as e: raise LoopyError( "When trying to index the array '%s' along axis " "%d (tagged '%s'), the index was not a compile-time " "constant (but it has to be in order for code to be " "generated). You likely want to unroll the iname(s) '%s'." % (ary.name, i, ary.dim_tags[i], str(e))) if not is_integer(result): raise LoopyError("subscript '%s[%s]' has non-constant " "index for separate-array axis %d (0-based)" % (ary.name, index, i)) return result def apply_offset(sub): import loopy as lp if ary.offset: if ary.offset is lp.auto: return var(array_name + "_offset") + sub elif isinstance(ary.offset, str): return var(ary.offset) + sub else: # assume it's an expression return ary.offset + sub else: return sub if not isinstance(index, tuple): index = (index, ) array_name = ary.name if ary.dim_tags is None: if len(index) != 1: raise LoopyError( "Array '%s' has no known axis implementation " "tags and therefore only supports one-dimensional " "indexing. (Did you mean 'shape=loopy.auto' instead of " "'shape=None'?)" % ary.name) return AccessInfo(array_name=array_name, subscripts=(apply_offset(index[0]), ), vector_index=None) if len(ary.dim_tags) != len(index): raise LoopyError("subscript to '%s[%s]' has the wrong " "number of indices (got: %d, expected: %d)" % (ary.name, index, len(index), len(ary.dim_tags))) num_target_axes = ary.num_target_axes() vector_index = None subscripts = [0] * num_target_axes vector_size = ary.vector_size(target) # {{{ process separate-array dim tags first, to find array name for i, (idx, dim_tag) in enumerate(zip(index, ary.dim_tags)): if isinstance(dim_tag, SeparateArrayArrayDimTag): idx = eval_expr_assert_integer_constant(i, idx) array_name += "_s%d" % idx # }}} # {{{ process remaining dim tags for i, (idx, dim_tag) in enumerate(zip(index, ary.dim_tags)): if isinstance(dim_tag, FixedStrideArrayDimTag): stride = dim_tag.stride if is_integer(stride): if not dim_tag.stride % vector_size == 0: raise LoopyError( "array '%s' has axis %d stride of " "%d, which is not divisible by the size of the " "vector (%d)" % (ary.name, i, dim_tag.stride, vector_size)) elif stride is lp.auto: stride = var(array_name + "_stride%d" % i) subscripts[dim_tag.target_axis] += (stride // vector_size) * idx elif isinstance(dim_tag, SeparateArrayArrayDimTag): pass elif isinstance(dim_tag, VectorArrayDimTag): from pymbolic.primitives import Variable if (vectorization_info is not None and isinstance(index[i], Variable) and index[i].name == vectorization_info.iname): # We'll do absolutely nothing here, which will result # in the vector being returned. pass else: idx = eval_expr_assert_integer_constant(i, idx) assert vector_index is None vector_index = idx else: raise LoopyError("unsupported array dim implementation tag '%s' " "in array '%s'" % (dim_tag, ary.name)) # }}} from pymbolic import var import loopy as lp if ary.offset: if num_target_axes > 1: raise NotImplementedError("offsets for multiple image axes") subscripts[0] = apply_offset(subscripts[0]) return AccessInfo(array_name=array_name, vector_index=vector_index, subscripts=subscripts)
def get_access_info(target, ary, index, eval_expr, vectorization_info): """ :arg ary: an object of type :class:`ArrayBase` :arg index: a tuple of indices representing a subscript into ary :arg vectorization_info: an instance of :class:`loopy.codegen.VectorizationInfo`, or *None*. """ def eval_expr_assert_integer_constant(i, expr): from pymbolic.mapper.evaluator import UnknownVariableError try: result = eval_expr(expr) except UnknownVariableError as e: raise LoopyError("When trying to index the array '%s' along axis " "%d (tagged '%s'), the index was not a compile-time " "constant (but it has to be in order for code to be " "generated). You likely want to unroll the iname(s) '%s'." % (ary.name, i, ary.dim_tags[i], str(e))) if not is_integer(result): raise LoopyError("subscript '%s[%s]' has non-constant " "index for separate-array axis %d (0-based)" % ( ary.name, index, i)) return result if not isinstance(index, tuple): index = (index,) array_name = ary.name if ary.dim_tags is None: if len(index) != 1: raise LoopyError("Array '%s' has no known axis implementation " "tags and therefore only supports one-dimensional " "indexing. (Did you mean 'shape=loopy.auto' instead of " "'shape=None'?)" % ary.name) return AccessInfo(array_name=array_name, subscripts=index, vector_index=None) if len(ary.dim_tags) != len(index): raise LoopyError("subscript to '%s[%s]' has the wrong " "number of indices (got: %d, expected: %d)" % ( ary.name, index, len(index), len(ary.dim_tags))) num_target_axes = ary.num_target_axes() vector_index = None subscripts = [0] * num_target_axes vector_size = ary.vector_size(target) # {{{ process separate-array dim tags first, to find array name for i, (idx, dim_tag) in enumerate(zip(index, ary.dim_tags)): if isinstance(dim_tag, SeparateArrayArrayDimTag): idx = eval_expr_assert_integer_constant(i, idx) array_name += "_s%d" % idx # }}} # {{{ process remaining dim tags for i, (idx, dim_tag) in enumerate(zip(index, ary.dim_tags)): if isinstance(dim_tag, FixedStrideArrayDimTag): import loopy as lp stride = dim_tag.stride if is_integer(stride): if not dim_tag.stride % vector_size == 0: raise LoopyError("array '%s' has axis %d stride of " "%d, which is not divisible by the size of the " "vector (%d)" % (ary.name, i, dim_tag.stride, vector_size)) elif stride is lp.auto: from pymbolic import var stride = var(array_name + "_stride%d" % i) subscripts[dim_tag.target_axis] += (stride // vector_size)*idx elif isinstance(dim_tag, SeparateArrayArrayDimTag): pass elif isinstance(dim_tag, VectorArrayDimTag): from pymbolic.primitives import Variable if (vectorization_info is not None and isinstance(index[i], Variable) and index[i].name == vectorization_info.iname): # We'll do absolutely nothing here, which will result # in the vector being returned. pass else: idx = eval_expr_assert_integer_constant(i, idx) assert vector_index is None vector_index = idx else: raise LoopyError("unsupported array dim implementation tag '%s' " "in array '%s'" % (dim_tag, ary.name)) # }}} from pymbolic import var import loopy as lp if ary.offset: if num_target_axes > 1: raise NotImplementedError("offsets for multiple image axes") offset_name = ary.offset if offset_name is lp.auto: offset_name = array_name+"_offset" subscripts[0] = var(offset_name) + subscripts[0] return AccessInfo( array_name=array_name, vector_index=vector_index, subscripts=subscripts)
def gen_decls(name_suffix, shape, strides, unvec_shape, unvec_strides, stride_arg_axes, dtype, user_index): """ :arg unvec_shape: shape tuple that accounts for :class:`loopy.kernel.array.VectorArrayDimTag` in a scalar manner :arg unvec_strides: strides tuple that accounts for :class:`loopy.kernel.array.VectorArrayDimTag` in a scalar manner :arg stride_arg_axes: a tuple *(user_axis, impl_axis, unvec_impl_axis)* :arg user_index: A tuple representing a (user-facing) multi-dimensional subscript. This is filled in with concrete integers when known (such as for separate-array dim tags), and with *None* where the index won't be known until run time. """ if dtype is None: dtype = self.dtype user_axis = len(user_index) num_user_axes = self.num_user_axes(require_answer=False) if num_user_axes is None or user_axis >= num_user_axes: # {{{ recursion base case full_name = self.name + name_suffix stride_args = [] strides = list(strides) unvec_strides = list(unvec_strides) # generate stride arguments, yielded later to keep array first for stride_user_axis, stride_impl_axis, stride_unvec_impl_axis \ in stride_arg_axes: stride_name = full_name + "_stride%d" % stride_user_axis from pymbolic import var strides[stride_impl_axis] = \ unvec_strides[stride_unvec_impl_axis] = \ var(stride_name) stride_args.append( ImplementedDataInfo( target=target, name=stride_name, dtype=index_dtype, arg_class=ValueArg, stride_for_name_and_axis=(full_name, stride_impl_axis), is_written=False)) yield ImplementedDataInfo(target=target, name=full_name, base_name=self.name, arg_class=type(self), dtype=dtype, shape=shape, strides=tuple(strides), unvec_shape=unvec_shape, unvec_strides=tuple(unvec_strides), allows_offset=bool(self.offset), is_written=is_written) import loopy as lp if self.offset is lp.auto: offset_name = full_name + "_offset" yield ImplementedDataInfo(target=target, name=offset_name, dtype=index_dtype, arg_class=ValueArg, offset_for_name=full_name, is_written=False) yield from stride_args # }}} return dim_tag = self.dim_tags[user_axis] if isinstance(dim_tag, FixedStrideArrayDimTag): if array_shape is None: new_shape_axis = None else: new_shape_axis = array_shape[user_axis] import loopy as lp if dim_tag.stride is lp.auto: new_stride_arg_axes = stride_arg_axes \ + ((user_axis, len(strides), len(unvec_strides)),) # repaired above when final array name is known # (and stride argument is created) new_stride_axis = None else: new_stride_arg_axes = stride_arg_axes new_stride_axis = dim_tag.stride yield from gen_decls(name_suffix, shape + (new_shape_axis, ), strides + (new_stride_axis, ), unvec_shape + (new_shape_axis, ), unvec_strides + (new_stride_axis, ), new_stride_arg_axes, dtype, user_index + (None, )) elif isinstance(dim_tag, SeparateArrayArrayDimTag): shape_i = array_shape[user_axis] if not is_integer(shape_i): raise LoopyError("shape of '%s' has non-constant " "integer axis %d (0-based)" % (self.name, user_axis)) for i in range(shape_i): yield from gen_decls(name_suffix + "_s%d" % i, shape, strides, unvec_shape, unvec_strides, stride_arg_axes, dtype, user_index + (i, )) elif isinstance(dim_tag, VectorArrayDimTag): shape_i = array_shape[user_axis] if not is_integer(shape_i): raise LoopyError("shape of '%s' has non-constant " "integer axis %d (0-based)" % (self.name, user_axis)) yield from gen_decls( name_suffix, shape, strides, unvec_shape + (shape_i, ), # vectors always have stride 1 unvec_strides + (1, ), stride_arg_axes, target.vector_dtype(dtype, shape_i), user_index + (None, )) else: raise LoopyError( "unsupported array dim implementation tag '%s' " "in array '%s'" % (dim_tag, self.name))
def gen_decls(name_suffix, shape, strides, unvec_shape, unvec_strides, stride_arg_axes, dtype, user_index): """ :arg unvec_shape: shape tuple that accounts for :class:`loopy.kernel.array.VectorArrayDimTag` in a scalar manner :arg unvec_strides: strides tuple that accounts for :class:`loopy.kernel.array.VectorArrayDimTag` in a scalar manner :arg stride_arg_axes: a tuple *(user_axis, impl_axis, unvec_impl_axis)* :arg user_index: A tuple representing a (user-facing) multi-dimensional subscript. This is filled in with concrete integers when known (such as for separate-array dim tags), and with *None* where the index won't be known until run time. """ if dtype is None: dtype = self.dtype user_axis = len(user_index) num_user_axes = self.num_user_axes(require_answer=False) if num_user_axes is None or user_axis >= num_user_axes: # {{{ recursion base case full_name = self.name + name_suffix stride_args = [] strides = list(strides) unvec_strides = list(unvec_strides) # generate stride arguments, yielded later to keep array first for stride_user_axis, stride_impl_axis, stride_unvec_impl_axis \ in stride_arg_axes: stride_name = full_name+"_stride%d" % stride_user_axis from pymbolic import var strides[stride_impl_axis] = \ unvec_strides[stride_unvec_impl_axis] = \ var(stride_name) stride_args.append( ImplementedDataInfo( target=target, name=stride_name, dtype=index_dtype, arg_class=ValueArg, stride_for_name_and_axis=( full_name, stride_impl_axis), is_written=False)) yield ImplementedDataInfo( target=target, name=full_name, base_name=self.name, arg_class=type(self), dtype=dtype, shape=shape, strides=tuple(strides), unvec_shape=unvec_shape, unvec_strides=tuple(unvec_strides), allows_offset=bool(self.offset), is_written=is_written) if self.offset: offset_name = full_name+"_offset" yield ImplementedDataInfo( target=target, name=offset_name, dtype=index_dtype, arg_class=ValueArg, offset_for_name=full_name, is_written=False) for sa in stride_args: yield sa # }}} return dim_tag = self.dim_tags[user_axis] if isinstance(dim_tag, FixedStrideArrayDimTag): if array_shape is None: new_shape_axis = None else: new_shape_axis = array_shape[user_axis] import loopy as lp if dim_tag.stride is lp.auto: new_stride_arg_axes = stride_arg_axes \ + ((user_axis, len(strides), len(unvec_strides)),) # repaired above when final array name is known # (and stride argument is created) new_stride_axis = None else: new_stride_arg_axes = stride_arg_axes new_stride_axis = dim_tag.stride for res in gen_decls(name_suffix, shape + (new_shape_axis,), strides + (new_stride_axis,), unvec_shape + (new_shape_axis,), unvec_strides + (new_stride_axis,), new_stride_arg_axes, dtype, user_index + (None,)): yield res elif isinstance(dim_tag, SeparateArrayArrayDimTag): shape_i = array_shape[user_axis] if not is_integer(shape_i): raise LoopyError("shape of '%s' has non-constant " "integer axis %d (0-based)" % ( self.name, user_axis)) for i in range(shape_i): for res in gen_decls(name_suffix + "_s%d" % i, shape, strides, unvec_shape, unvec_strides, stride_arg_axes, dtype, user_index + (i,)): yield res elif isinstance(dim_tag, VectorArrayDimTag): shape_i = array_shape[user_axis] if not is_integer(shape_i): raise LoopyError("shape of '%s' has non-constant " "integer axis %d (0-based)" % ( self.name, user_axis)) for res in gen_decls(name_suffix, shape, strides, unvec_shape + (shape_i,), # vectors always have stride 1 unvec_strides + (1,), stride_arg_axes, target.vector_dtype(dtype, shape_i), user_index + (None,)): yield res else: raise LoopyError("unsupported array dim implementation tag '%s' " "in array '%s'" % (dim_tag, self.name))
def map_constant(self, expr, type_context): from loopy.symbolic import Literal if isinstance(expr, (complex, np.complexfloating)): try: dtype = expr.dtype except AttributeError: # (COMPLEX_GUESS_LOGIC) This made it through type 'guessing' in # type inference, and it was concluded there (search for # COMPLEX_GUESS_LOGIC in loopy.type_inference), that no # accuracy was lost by using single precision. cast_type = "cfloat" else: if dtype == np.complex128: cast_type = "cdouble" elif dtype == np.complex64: cast_type = "cfloat" else: raise RuntimeError( "unsupported complex type in expression " "generation: %s" % type(expr)) return var("%s_new" % cast_type)(expr.real, expr.imag) elif isinstance(expr, np.generic): # Explicitly typed: Generated code must reflect type exactly. # FIXME: This assumes a 32-bit architecture. if isinstance(expr, np.float32): return Literal(repr(expr) + "f") elif isinstance(expr, np.float64): return Literal(repr(expr)) # Disabled for now, possibly should be a subtarget. # elif isinstance(expr, np.float128): # return Literal(repr(expr)+"l") elif isinstance(expr, np.integer): suffix = "" iinfo = np.iinfo(expr) if iinfo.min == 0: suffix += "u" if iinfo.max > (2**31 - 1): suffix += "l" return Literal(repr(expr) + suffix) else: raise LoopyError("do not know how to generate code for " "constant of numpy type '%s'" % type(expr).__name__) else: if type_context == "f": return Literal(repr(np.float32(expr)) + "f") elif type_context == "d": return Literal(repr(float(expr))) elif type_context == "i": return int(expr) else: if is_integer(expr): return int(expr) raise RuntimeError("don't know how to generate code " "for constant '%s'" % expr)
def convert_computed_to_fixed_dim_tags(name, num_user_axes, num_target_axes, shape, dim_tags): # Just to clarify: # # - user axes are user-facing--what the user actually uses for indexing. # # - target axes are implementation facing. Normal in-memory arrays have one. # 3D images have three. import loopy as lp # {{{ pick apart arg dim tags into computed, fixed and vec vector_dim = None # a mapping from target axes to {layout_nesting_level: dim_tag_index} target_axis_to_nesting_level_map = {} for i, dim_tag in enumerate(dim_tags): if isinstance(dim_tag, VectorArrayDimTag): if vector_dim is not None: raise LoopyError("arg '%s' may only have one vector-tagged " "argument dimension" % name) vector_dim = i elif isinstance(dim_tag, _StrideArrayDimTagBase): if dim_tag.layout_nesting_level is None: continue nl_map = target_axis_to_nesting_level_map \ .setdefault(dim_tag.target_axis, {}) assert dim_tag.layout_nesting_level not in nl_map nl_map[dim_tag.layout_nesting_level] = i elif isinstance(dim_tag, SeparateArrayArrayDimTag): pass else: raise LoopyError("invalid array dim tag") # }}} # {{{ convert computed to fixed stride dim tags new_dim_tags = dim_tags[:] for target_axis in range(num_target_axes): if vector_dim is None: stride_so_far = 1 else: if shape is None or shape is lp.auto: # unable to normalize without known shape return None if not is_integer(shape[vector_dim]): raise TypeError( "shape along vector axis %d of array '%s' " "must be an integer, not an expression ('%s')" % (vector_dim, name, shape[vector_dim])) stride_so_far = shape[vector_dim] # FIXME: OpenCL-specific if stride_so_far == 3: stride_so_far = 4 nesting_level_map = target_axis_to_nesting_level_map.get( target_axis, {}) nl_keys = sorted(nesting_level_map.keys()) if not nl_keys: continue for key in nl_keys: dim_tag_index = nesting_level_map[key] dim_tag = dim_tags[dim_tag_index] if isinstance(dim_tag, ComputedStrideArrayDimTag): if stride_so_far is None: raise LoopyError( "unable to determine fixed stride " "for axis %d because it is nested outside of " "an 'auto' stride axis" % dim_tag_index) new_dim_tags[dim_tag_index] = FixedStrideArrayDimTag( stride_so_far, target_axis=dim_tag.target_axis, layout_nesting_level=dim_tag.layout_nesting_level) if shape is None or shape is lp.auto: # unable to normalize without known shape return None shape_axis = shape[dim_tag_index] if shape_axis is None: stride_so_far = None else: stride_so_far *= shape_axis if dim_tag.pad_to is not None: from pytools import div_ceil stride_so_far = (div_ceil(stride_so_far, dim_tag.pad_to) * stride_so_far) elif isinstance(dim_tag, FixedStrideArrayDimTag): stride_so_far = dim_tag.stride if stride_so_far is lp.auto: stride_so_far = None else: raise TypeError("internal error in dim_tag conversion") # }}} return new_dim_tags
def convert_computed_to_fixed_dim_tags(name, num_user_axes, num_target_axes, shape, dim_tags): # Just to clarify: # # - user axes are user-facing--what the user actually uses for indexing. # # - target axes are implementation facing. Normal in-memory arrays have one. # 3D images have three. import loopy as lp # {{{ pick apart arg dim tags into computed, fixed and vec vector_dim = None # a mapping from target axes to {layout_nesting_level: dim_tag_index} target_axis_to_nesting_level_map = {} for i, dim_tag in enumerate(dim_tags): if isinstance(dim_tag, VectorArrayDimTag): if vector_dim is not None: raise LoopyError("arg '%s' may only have one vector-tagged " "argument dimension" % name) vector_dim = i elif isinstance(dim_tag, _StrideArrayDimTagBase): if dim_tag.layout_nesting_level is None: continue nl_map = target_axis_to_nesting_level_map \ .setdefault(dim_tag.target_axis, {}) assert dim_tag.layout_nesting_level not in nl_map nl_map[dim_tag.layout_nesting_level] = i elif isinstance(dim_tag, SeparateArrayArrayDimTag): pass else: raise LoopyError("invalid array dim tag") # }}} # {{{ convert computed to fixed stride dim tags new_dim_tags = dim_tags[:] for target_axis in range(num_target_axes): if vector_dim is None: stride_so_far = 1 else: if shape is None or shape is lp.auto: # unable to normalize without known shape return None if not is_integer(shape[vector_dim]): raise TypeError("shape along vector axis %d of array '%s' " "must be an integer, not an expression ('%s')" % (vector_dim, name, shape[vector_dim])) stride_so_far = shape[vector_dim] # FIXME: OpenCL-specific if stride_so_far == 3: stride_so_far = 4 nesting_level_map = target_axis_to_nesting_level_map.get(target_axis, {}) nl_keys = sorted(nesting_level_map.keys()) if not nl_keys: continue for key in nl_keys: dim_tag_index = nesting_level_map[key] dim_tag = dim_tags[dim_tag_index] if isinstance(dim_tag, ComputedStrideArrayDimTag): if stride_so_far is None: raise LoopyError("unable to determine fixed stride " "for axis %d because it is nested outside of " "an 'auto' stride axis" % dim_tag_index) new_dim_tags[dim_tag_index] = FixedStrideArrayDimTag(stride_so_far, target_axis=dim_tag.target_axis, layout_nesting_level=dim_tag.layout_nesting_level) if shape is None or shape is lp.auto: # unable to normalize without known shape return None shape_axis = shape[dim_tag_index] if shape_axis is None: stride_so_far = None else: stride_so_far *= shape_axis if dim_tag.pad_to is not None: from pytools import div_ceil stride_so_far = ( div_ceil(stride_so_far, dim_tag.pad_to) * stride_so_far) elif isinstance(dim_tag, FixedStrideArrayDimTag): stride_so_far = dim_tag.stride if stride_so_far is lp.auto: stride_so_far = None else: raise TypeError("internal error in dim_tag conversion") # }}} return new_dim_tags