def with_types(self, arg_id_to_dtype, callables_table): if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = arg_id_to_dtype[0].numpy_dtype if dtype.kind in ("u", "i"): # ints and unsigned casted to float32 dtype = np.float32 if dtype.type == np.float32: name_in_target = "log2f" elif dtype.type == np.float64: name_in_target = "log2" pass else: raise TypeError(f"log2: unexpected type {dtype}") from loopy.types import NumpyType return (self.copy(name_in_target=name_in_target, arg_id_to_dtype={ 0: NumpyType(dtype), -1: NumpyType(dtype) }), callables_table)
def map_constant(self, expr): if is_integer(expr): for tp in [np.int32, np.int64]: iinfo = np.iinfo(tp) if iinfo.min <= expr <= iinfo.max: return [NumpyType(np.dtype(tp))] else: raise TypeInferenceFailure("integer constant '%s' too large" % expr) dt = np.asarray(expr).dtype if hasattr(expr, "dtype"): return [NumpyType(expr.dtype)] elif isinstance(expr, np.number): # Numpy types are sized return [NumpyType(np.dtype(type(expr)))] elif dt.kind == "f": # deduce the smaller type by default return [NumpyType(np.dtype(np.float32))] elif dt.kind == "c": if np.complex64(expr) == np.complex128(expr): # (COMPLEX_GUESS_LOGIC) # No precision is lost by 'guessing' single precision, use that. # This at least covers simple cases like '1j'. return [NumpyType(np.dtype(np.complex64))] # Codegen for complex types depends on exactly correct types. # Refuse temptation to guess. raise TypeInferenceFailure("Complex constant '%s' needs to " "be sized for type inference " % expr) else: raise TypeInferenceFailure("Cannot deduce type of constant '%s'" % expr)
def with_types(self, arg_id_to_dtype, callables_table): name = self.name for id in arg_id_to_dtype: if not -1 <= id <= 0: raise LoopyError(f"'{name}' can take only one argument.") if 0 not in arg_id_to_dtype or arg_id_to_dtype[0] is None: # the types provided aren't mature enough to specialize the callable return self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table dtype = arg_id_to_dtype[0] dtype = dtype.numpy_dtype if dtype.kind in ("u", "i"): # ints and unsigned casted to float32 dtype = np.float32 elif dtype.kind == "c": raise LoopyTypeError(f"{name} does not support type {dtype}") typed_callable = self.copy(name_in_target=name, arg_id_to_dtype={ 0: NumpyType(dtype), -1: NumpyType(dtype) }) return typed_callable, callables_table
def c_symbol_mangler(kernel, name): # float NAN as defined in C99 standard if name == "NAN": return NumpyType(np.dtype(np.float32)), name if name in ["INT_MAX", "INT_MIN"]: return NumpyType(np.dtype(np.int32)), name return None
def with_types(self, arg_id_to_dtype, callables_table): if 0 not in arg_id_to_dtype or 1 not in arg_id_to_dtype or ( arg_id_to_dtype[0] is None or arg_id_to_dtype[1] is None): # the types provided aren't mature enough to specialize the # callable return (self.copy(), callables_table) name = self.name target = self.target rng_variant = FUNC_NAMES_TO_RNG[name] from loopy.types import NumpyType base_dtype = {32: np.uint32, 64: np.uint64}[rng_variant.bits] ctr_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.width) key_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.key_width) fn = rng_variant.full_name if name == fn: new_arg_id_to_dtype = { -1: ctr_dtype, -2: ctr_dtype, 0: ctr_dtype, 1: key_dtype } return (self.copy(arg_id_to_dtype=new_arg_id_to_dtype, name_in_target=fn + "_gen"), callables_table) elif name == fn + "_f32": new_arg_id_to_dtype = { -1: target.vector_dtype(NumpyType(np.float32), rng_variant.width), -2: ctr_dtype, 0: ctr_dtype, 1: key_dtype } return self.copy(arg_id_to_dtype=new_arg_id_to_dtype, name_in_target=name), callables_table elif name == fn + "_f64": new_arg_id_to_dtype = { -1: target.vector_dtype(NumpyType(np.float64), rng_variant.width), -2: ctr_dtype, 0: ctr_dtype, 1: key_dtype } return self.copy(arg_id_to_dtype=new_arg_id_to_dtype, name_in_target=name), callables_table return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table)
def parse_result_type(target, op_type): try: return NumpyType(np.dtype(op_type)) except TypeError: pass if op_type.startswith("vec_"): try: return NumpyType(target.get_or_register_dtype(op_type[4:])) except AttributeError: pass raise LoopyError("unable to parse reduction type: '%s'" % op_type)
def opencl_function_mangler(kernel, name, arg_dtypes): if not isinstance(name, str): return None # OpenCL has min(), max() for integer types if name in ["max", "min"] and len(arg_dtypes) == 2: dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "i": result_dtype = NumpyType(dtype) return CallMangleInfo(target_name=name, result_dtypes=(result_dtype, ), arg_dtypes=2 * (result_dtype, )) if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields[ "s0"] return CallMangleInfo(target_name=name, result_dtypes=(NumpyType(scalar_dtype), ), arg_dtypes=(arg_dtypes[0], ) * 2) if name in _CL_SIMPLE_MULTI_ARG_FUNCTIONS: num_args = _CL_SIMPLE_MULTI_ARG_FUNCTIONS[name] if len(arg_dtypes) != num_args: raise LoopyError("%s takes %d arguments (%d received)" % (name, num_args, len(arg_dtypes))) dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "c": raise LoopyError("%s does not support complex numbers" % name) result_dtype = NumpyType(dtype) return CallMangleInfo(target_name=name, result_dtypes=(result_dtype, ), arg_dtypes=(result_dtype, ) * num_args) if name in VECTOR_LITERAL_FUNCS: base_tp_name, dtype, count = VECTOR_LITERAL_FUNCS[name] if count != len(arg_dtypes): return None return CallMangleInfo(target_name="(%s%d) " % (base_tp_name, count), result_dtypes=(kernel.target.vector_dtype( NumpyType(dtype), count), ), arg_dtypes=(NumpyType(dtype), ) * count) return None
def opencl_symbol_mangler(kernel, name): # FIXME: should be more picky about exact names if name.startswith("FLT_"): return NumpyType(np.dtype(np.float32)), name elif name.startswith("DBL_"): return NumpyType(np.dtype(np.float64)), name elif name.startswith("M_"): if name.endswith("_F"): return NumpyType(np.dtype(np.float32)), name else: return NumpyType(np.dtype(np.float64)), name elif name == "INFINITY": return NumpyType(np.dtype(np.float32)), name else: return None
def bessel_mangler(kernel, identifier, arg_dtypes): """A function "mangler" to make Bessel functions digestible for :mod:`loopy`. See argument *function_manglers* to :func:`loopy.make_kernel`. """ from loopy.target.pyopencl import PyOpenCLTarget if not isinstance(kernel.target, PyOpenCLTarget): raise NotImplementedError( "Only the PyOpenCLTarget is supported as of now") if identifier == "hank1_01": if arg_dtypes[0].is_complex(): identifier = "hank1_01_complex" return lp.CallMangleInfo( target_name=identifier, result_dtypes=(NumpyType(np.dtype(hank1_01_result_dtype)), ), arg_dtypes=(NumpyType(np.dtype(np.complex128)), )) else: return lp.CallMangleInfo( target_name=identifier, result_dtypes=(NumpyType(np.dtype(hank1_01_result_dtype)), ), arg_dtypes=(NumpyType(np.dtype(np.float64)), )) elif identifier == "bessel_jv_two": if arg_dtypes[1].is_complex(): identifier = "bessel_jv_two_complex" return lp.CallMangleInfo( target_name=identifier, result_dtypes=(NumpyType( np.dtype(bessel_j_two_result_dtype)), ), arg_dtypes=( NumpyType(np.dtype(np.int32)), NumpyType(np.dtype(np.complex128)), )) else: return lp.CallMangleInfo( target_name=identifier, result_dtypes=(NumpyType( np.dtype(bessel_j_two_result_dtype)), ), arg_dtypes=( NumpyType(np.dtype(np.int32)), NumpyType(np.dtype(np.float64)), )) else: return None
def process_dtype(dtype): if isinstance(dtype, type) and issubclass(dtype, np.generic): dtype = np.dtype(dtype) if isinstance(dtype, np.dtype): dtype = NumpyType(dtype, self.kernel.target) return dtype
def pyopencl_function_mangler(target, name, arg_dtypes): if len(arg_dtypes) == 1 and isinstance(name, str): arg_dtype, = arg_dtypes if arg_dtype.is_complex(): if arg_dtype.numpy_dtype == np.complex64: tpname = "cfloat" elif arg_dtype.numpy_dtype == np.complex128: tpname = "cdouble" else: raise RuntimeError("unexpected complex type '%s'" % arg_dtype) if name in ["sqrt", "exp", "log", "sin", "cos", "tan", "sinh", "cosh", "tanh", "conj"]: return CallMangleInfo( target_name="%s_%s" % (tpname, name), result_dtypes=(arg_dtype,), arg_dtypes=(arg_dtype,)) if name in ["real", "imag", "abs"]: return CallMangleInfo( target_name="%s_%s" % (tpname, name), result_dtypes=(NumpyType( np.dtype(arg_dtype.numpy_dtype.type(0).real)), ), arg_dtypes=(arg_dtype,)) return None
def arg_to_dtype_set(self, kwargs): if not self.has_runtime_typed_args: return None from loopy.types import NumpyType target = self.kernel.target impl_arg_to_arg = self.kernel.impl_arg_to_arg arg_to_dtype = {} for arg_name, val in six.iteritems(kwargs): arg = impl_arg_to_arg.get(arg_name, None) if arg is None: # offsets, strides and such continue if arg.dtype is None and val is not None: try: dtype = val.dtype except AttributeError: pass else: arg_to_dtype[arg_name] = NumpyType(dtype, target) return frozenset(six.iteritems(arg_to_dtype))
def with_types(self, arg_id_to_dtype, kernel, callables_table): dtypes = OrderedDict() for i in range(len(arg_id_to_dtype)): if arg_id_to_dtype.get(i) is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) else: mat_dtype = arg_id_to_dtype[i].numpy_dtype dtypes[i] = NumpyType(mat_dtype) dtypes[-1] = NumpyType(dtypes[0].dtype) return (self.copy(name_in_target=self.name_in_target, arg_id_to_dtype=dtypes), callables_table)
def get_or_register_dtype(self, names, dtype=None): if dtype is not None: from loopy.types import AtomicNumpyType, NumpyType if isinstance(dtype, AtomicNumpyType): return self.wrapped_registry.get_or_register_dtype( names, NumpyType(dtype.dtype)) return self.wrapped_registry.get_or_register_dtype(names, dtype)
def process_dtype(dtype): if isinstance(dtype, type) and issubclass(dtype, np.generic): dtype = np.dtype(dtype) if isinstance(dtype, np.dtype): from loopy.types import NumpyType dtype = NumpyType(dtype, self.program.target) return dtype
def map_lookup(self, expr): agg_result = self.rec(expr.aggregate) if not agg_result: return agg_result field = agg_result[0].numpy_dtype.fields[expr.name] dtype = field[0] return [NumpyType(dtype)]
def vector_dtype(self, base, count): try: import pyopencl.cltypes as cltypes vec_types = cltypes.vec_types except ImportError: from pyopencl.array import vec vec_types = vec.types return NumpyType(vec_types[base.numpy_dtype, count], target=self)
def with_types(self, arg_id_to_dtype, callables_table): new_arg_id_to_dtype = { i: dtype for i, dtype in arg_id_to_dtype.items() if dtype is not None } new_arg_id_to_dtype[-1] = NumpyType(np.int32) return (self.copy(arg_id_to_dtype=new_arg_id_to_dtype), callables_table)
def map_quotient(self, expr): n_dtype_set = self.rec(expr.numerator) d_dtype_set = self.rec(expr.denominator) dtypes = n_dtype_set + d_dtype_set if all(dtype.is_integral() for dtype in dtypes): # both integers return [NumpyType(np.dtype(np.float64))] else: return self.combine([n_dtype_set, d_dtype_set])
def bessel_function_mangler(kernel, name, arg_dtypes): from loopy.types import NumpyType if name == "bessel_j" and len(arg_dtypes) == 2: n_dtype, x_dtype, = arg_dtypes # *technically* takes a float, but let's not worry about that. if n_dtype.numpy_dtype.kind != "i": raise TypeError("%s expects an integer first argument") from loopy.kernel.data import CallMangleInfo return CallMangleInfo( "bessel_jv", (NumpyType(np.float64),), (NumpyType(np.int32), NumpyType(np.float64)), ) elif name == "bessel_y" and len(arg_dtypes) == 2: n_dtype, x_dtype, = arg_dtypes # *technically* takes a float, but let's not worry about that. if n_dtype.numpy_dtype.kind != "i": raise TypeError("%s expects an integer first argument") from loopy.kernel.data import CallMangleInfo return CallMangleInfo( "bessel_yn", (NumpyType(np.float64),), (NumpyType(np.int32), NumpyType(np.float64)), ) return None
def random123_function_mangler(kernel, name, arg_dtypes): try: rng_variant = FUNC_NAMES_TO_RNG[name] except KeyError: return None from loopy.types import NumpyType target = kernel.target base_dtype = {32: np.uint32, 64: np.uint64}[rng_variant.bits] ctr_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.width) key_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.key_width) from loopy.kernel.data import CallMangleInfo fn = rng_variant.full_name if name == fn: return CallMangleInfo( target_name=fn+"_gen", result_dtypes=(ctr_dtype, ctr_dtype), arg_dtypes=(ctr_dtype, key_dtype)) elif name == fn + "_f32": return CallMangleInfo( target_name=name, result_dtypes=( target.vector_dtype(NumpyType(np.float32), rng_variant.width), ctr_dtype), arg_dtypes=(ctr_dtype, key_dtype)) elif name == fn + "_f64": return CallMangleInfo( target_name=name, result_dtypes=( target.vector_dtype(NumpyType(np.float64), rng_variant.width), ctr_dtype), arg_dtypes=(ctr_dtype, key_dtype)) else: return None
def dtype_to_type_context(target, dtype): from loopy.types import NumpyType if dtype.is_integral(): return "i" if isinstance(dtype, NumpyType) and dtype.dtype in [np.float64, np.complex128]: return "d" if isinstance(dtype, NumpyType) and dtype.dtype in [np.float32, np.complex64]: return "f" if target.is_vector_dtype(dtype): return dtype_to_type_context( target, NumpyType(dtype.numpy_dtype.fields["x"][0])) return None
def map_lookup(self, expr): agg_result = self.rec(expr.aggregate) if not agg_result: return agg_result numpy_dtype = agg_result[0].numpy_dtype fields = numpy_dtype.fields if fields is None: raise LoopyError("cannot look up attribute '%s' in " "non-aggregate expression '%s'" % (expr.aggregate, expr.name)) try: field = fields[expr.name] except KeyError: raise LoopyError("cannot look up attribute '%s' in " "aggregate expression '%s' of dtype '%s'" % (expr.aggregate, expr.name, numpy_dtype)) dtype = field[0] return [NumpyType(dtype)]
def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, lhs_expr, rhs_expr, lhs_dtype, rhs_type_context): from pymbolic.primitives import Sum from cgen import Statement from pymbolic.mapper.stringifier import PREC_NONE if isinstance(lhs_dtype, NumpyType) and lhs_dtype.numpy_dtype in [ np.int32, np.int64, np.float32, np.float64 ]: # atomicAdd if isinstance(rhs_expr, Sum): ecm = self.get_expression_to_code_mapper(codegen_state) new_rhs_expr = Sum( tuple(c for c in rhs_expr.children if c != lhs_expr)) lhs_expr_code = ecm(lhs_expr) rhs_expr_code = ecm(new_rhs_expr) return Statement("atomicAdd(&{}, {})".format( lhs_expr_code, rhs_expr_code)) else: from cgen import Block, DoWhile, Assign from loopy.target.c import POD old_val_var = codegen_state.var_name_generator("loopy_old_val") new_val_var = codegen_state.var_name_generator("loopy_new_val") from loopy.kernel.data import TemporaryVariable ecm = codegen_state.expression_to_code_mapper.with_assignments( { old_val_var: TemporaryVariable(old_val_var, lhs_dtype), new_val_var: TemporaryVariable(new_val_var, lhs_dtype), }) lhs_expr_code = ecm(lhs_expr, prec=PREC_NONE, type_context=None) from pymbolic.mapper.substitutor import make_subst_func from pymbolic import var from loopy.symbolic import SubstitutionMapper subst = SubstitutionMapper( make_subst_func({lhs_expr: var(old_val_var)})) rhs_expr_code = ecm(subst(rhs_expr), prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) cast_str = "" old_val = old_val_var new_val = new_val_var if lhs_dtype.numpy_dtype.kind == "f": if lhs_dtype.numpy_dtype == np.float32: ctype = "int" elif lhs_dtype.numpy_dtype == np.float64: ctype = "long" else: raise AssertionError() old_val = "*(%s *) &" % ctype + old_val new_val = "*(%s *) &" % ctype + new_val cast_str = "(%s *) " % (ctype) return Block([ POD(self, NumpyType(lhs_dtype.dtype, target=self.target), old_val_var), POD(self, NumpyType(lhs_dtype.dtype, target=self.target), new_val_var), DoWhile( "atomicCAS(" "%(cast_str)s&(%(lhs_expr)s), " "%(old_val)s, " "%(new_val)s" ") != %(old_val)s" % { "cast_str": cast_str, "lhs_expr": lhs_expr_code, "old_val": old_val, "new_val": new_val, }, Block([ Assign(old_val_var, lhs_expr_code), Assign(new_val_var, rhs_expr_code), ])) ]) else: raise NotImplementedError("atomic update for '%s'" % lhs_dtype)
def cuda_with_types(self, arg_id_to_dtype, callables_table): name = self.name if name in _CUDA_SPECIFIC_FUNCTIONS: num_args = _CUDA_SPECIFIC_FUNCTIONS[name] # {{{ sanity checks for id, dtype in arg_id_to_dtype.items(): if not -1 <= id < num_args: raise LoopyError("%s can take only %d arguments." % (name, num_args)) if dtype is not None and dtype.kind == "c": raise LoopyTypeError( f"'{name}' does not support complex arguments.") # }}} for i in range(num_args): if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) dtype = np.find_common_type([], [ dtype.numpy_dtype for id, dtype in arg_id_to_dtype.items() if id >= 0 ]) updated_arg_id_to_dtype = { id: NumpyType(dtype) for id in range(-1, num_args) } return (self.copy(name_in_target=name, arg_id_to_dtype=updated_arg_id_to_dtype), callables_table) if name == "dot": # CUDA dot function: # Performs dot product. Input types: vector and return type: scalar. for i in range(2): if i not in arg_id_to_dtype or arg_id_to_dtype[i] is None: # the types provided aren't mature enough to specialize the # callable return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table) input_dtype = arg_id_to_dtype[0] scalar_dtype, offset, field_name = input_dtype.fields["x"] return_dtype = scalar_dtype return self.copy(arg_id_to_dtype={ 0: input_dtype, 1: input_dtype, -1: return_dtype }) return (self.copy(arg_id_to_dtype=arg_id_to_dtype), callables_table)
def c_math_mangler(target, name, arg_dtypes, modify_name=True): # Function mangler for math functions defined in C standard # Convert abs, min, max to fabs, fmin, fmax. # If modify_name is set to True, function names are modified according to # floating point types of the arguments (e.g. cos(double), cosf(float)) # This should be set to True for C and Cuda, False for OpenCL if not isinstance(name, str): return None if name in ["abs", "min", "max"]: name = "f" + name # unitary functions if (name in ["fabs", "acos", "asin", "atan", "cos", "cosh", "sin", "sinh", "tanh", "exp", "log", "log10", "sqrt", "ceil", "floor"] and len(arg_dtypes) == 1 and arg_dtypes[0].numpy_dtype.kind == "f"): dtype = arg_dtypes[0].numpy_dtype if modify_name: if dtype == np.float64: pass # fabs elif dtype == np.float32: name = name + "f" # fabsf elif dtype == np.float128: # pylint:disable=no-member name = name + "l" # fabsl else: raise LoopyTypeError("%s does not support type %s" % (name, dtype)) return CallMangleInfo( target_name=name, result_dtypes=arg_dtypes, arg_dtypes=arg_dtypes) # binary functions if (name in ["fmax", "fmin", "copysign"] and len(arg_dtypes) == 2): dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "c": raise LoopyTypeError("%s does not support complex numbers") elif dtype.kind == "f": if modify_name: if dtype == np.float64: pass # fmin elif dtype == np.float32: name = name + "f" # fminf elif dtype == np.float128: # pylint:disable=no-member name = name + "l" # fminl else: raise LoopyTypeError("%s does not support type %s" % (name, dtype)) result_dtype = NumpyType(dtype) return CallMangleInfo( target_name=name, result_dtypes=(result_dtype,), arg_dtypes=2*(result_dtype,)) return None
def __init__(self, name, dtype=None, shape=(), address_space=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, base_indices=None, storage_shape=None, base_storage=None, initializer=None, read_only=False, _base_storage_access_may_be_aliasing=False, **kwargs): """ :arg dtype: :class:`loopy.auto` or a :class:`numpy.dtype` :arg shape: :class:`loopy.auto` or a shape tuple :arg base_indices: :class:`loopy.auto` or a tuple of base indices """ scope = kwargs.pop("scope", None) if scope is not None: warn("Passing 'scope' is deprecated. Use 'address_space' instead.", DeprecationWarning, stacklevel=2) if address_space is not None: raise ValueError("only one of 'scope' and 'address_space' " "may be specified") else: address_space = scope del scope if address_space is None: address_space = auto if address_space is None: raise LoopyError( "temporary variable '%s': " "address_space must not be None" % name) if initializer is None: pass elif isinstance(initializer, np.ndarray): if offset != 0: raise LoopyError( "temporary variable '%s': " "offset must be 0 if initializer specified" % name) from loopy.types import NumpyType, to_loopy_type if dtype is auto or dtype is None: dtype = NumpyType(initializer.dtype) elif to_loopy_type(dtype) != to_loopy_type(initializer.dtype): raise LoopyError( "temporary variable '%s': " "dtype of initializer does not match " "dtype of array." % name) if shape is auto: shape = initializer.shape else: raise LoopyError( "temporary variable '%s': " "initializer must be None or a numpy array" % name) if order is None: order = "C" if base_indices is None: base_indices = (0,) * len(shape) if not read_only and initializer is not None: raise LoopyError( "temporary variable '%s': " "read-write variables with initializer " "are not currently supported " "(did you mean to set read_only=True?)" % name) if base_storage is not None and initializer is not None: raise LoopyError( "temporary variable '%s': " "base_storage and initializer are " "mutually exclusive" % name) if base_storage is None and _base_storage_access_may_be_aliasing: raise LoopyError( "temporary variable '%s': " "_base_storage_access_may_be_aliasing option, but no " "base_storage given!" % name) ArrayBase.__init__(self, name=intern(name), dtype=dtype, shape=shape, strides=strides, dim_tags=dim_tags, offset=offset, dim_names=dim_names, order=order, base_indices=base_indices, address_space=address_space, storage_shape=storage_shape, base_storage=base_storage, initializer=initializer, read_only=read_only, _base_storage_access_may_be_aliasing=( _base_storage_access_may_be_aliasing), **kwargs)
def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, lhs_expr, rhs_expr, lhs_dtype, rhs_type_context): from pymbolic.mapper.stringifier import PREC_NONE # FIXME: Could detect operations, generate atomic_{add,...} when # appropriate. if isinstance(lhs_dtype, NumpyType) and lhs_dtype.numpy_dtype in [ np.int32, np.int64, np.float32, np.float64 ]: from cgen import Block, DoWhile, Assign from loopy.target.c import POD old_val_var = codegen_state.var_name_generator("loopy_old_val") new_val_var = codegen_state.var_name_generator("loopy_new_val") from loopy.kernel.data import TemporaryVariable, AddressSpace ecm = codegen_state.expression_to_code_mapper.with_assignments({ old_val_var: TemporaryVariable(old_val_var, lhs_dtype), new_val_var: TemporaryVariable(new_val_var, lhs_dtype), }) lhs_expr_code = ecm(lhs_expr, prec=PREC_NONE, type_context=None) from pymbolic.mapper.substitutor import make_subst_func from pymbolic import var from loopy.symbolic import SubstitutionMapper subst = SubstitutionMapper( make_subst_func({lhs_expr: var(old_val_var)})) rhs_expr_code = ecm(subst(rhs_expr), prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype) if lhs_dtype.numpy_dtype.itemsize == 4: func_name = "atomic_cmpxchg" elif lhs_dtype.numpy_dtype.itemsize == 8: func_name = "atom_cmpxchg" else: raise LoopyError("unexpected atomic size") cast_str = "" old_val = old_val_var new_val = new_val_var if lhs_dtype.numpy_dtype.kind == "f": if lhs_dtype.numpy_dtype == np.float32: ctype = "int" elif lhs_dtype.numpy_dtype == np.float64: ctype = "long" else: assert False from loopy.kernel.data import (TemporaryVariable, ArrayArg) if (isinstance(lhs_var, ArrayArg) and lhs_var.address_space == AddressSpace.GLOBAL): var_kind = "__global" elif (isinstance(lhs_var, ArrayArg) and lhs_var.address_space == AddressSpace.LOCAL): var_kind = "__local" elif (isinstance(lhs_var, TemporaryVariable) and lhs_var.address_space == AddressSpace.LOCAL): var_kind = "__local" elif (isinstance(lhs_var, TemporaryVariable) and lhs_var.address_space == AddressSpace.GLOBAL): var_kind = "__global" else: raise LoopyError("unexpected kind of variable '%s' in " "atomic operation: " % (lhs_var.name, type(lhs_var).__name__)) old_val = "*(%s *) &" % ctype + old_val new_val = "*(%s *) &" % ctype + new_val cast_str = "(%s %s *) " % (var_kind, ctype) return Block([ POD(self, NumpyType(lhs_dtype.dtype, target=self.target), old_val_var), POD(self, NumpyType(lhs_dtype.dtype, target=self.target), new_val_var), DoWhile( "%(func_name)s(" "%(cast_str)s&(%(lhs_expr)s), " "%(old_val)s, " "%(new_val)s" ") != %(old_val)s" % { "func_name": func_name, "cast_str": cast_str, "lhs_expr": lhs_expr_code, "old_val": old_val, "new_val": new_val, }, Block([ Assign(old_val_var, lhs_expr_code), Assign(new_val_var, rhs_expr_code), ])) ]) else: raise NotImplementedError("atomic update for '%s'" % lhs_dtype)
def vector_dtype(self, base, count): return NumpyType(vec.types[base.numpy_dtype, count], target=self)
def vector_dtype(self, base, count): from pyopencl.array import vec return NumpyType( vec.types[base.numpy_dtype, count], target=self)