def _GpuKernelBase_version(self): ctx = gpuarray.get_default_context() return (2, ctx.kind, ctx.devname)
def ufunc21(name, a, b, out=None, context=None): """Call a ufunc with 2 inputs and 1 output. Parameters ---------- name : str Name of the NumPy ufunc. a, b : `array-like` Input arrays to which the ufunc should be applied. out : `pygpu.gpuarray.GpuArray`, optional Array in which to store the result. context : `pygpu.gpuarray.GpuContext`, optional Use this GPU context to evaluate the GPU kernel. For ``None``, if no GPU array is among the provided parameters, a default GPU context must have been set. Returns ------- out : `pygpu.gpuarray.GpuArray` Result of the computation. If ``out`` was given, the returned object is a reference to it. The type of the returned array is `pygpu._array.ndgpuarray` if - no GPU array was among the parameters or - one of the parameters had type `pygpu._array.ndgpuarray`. """ # Lazy import to avoid circular dependency from pygpu._array import ndgpuarray # --- Prepare input array --- # # Determine GPU context and class. Use the "highest" class present in the # inputs, defaulting to `ndgpuarray` need_context = True cls = None for ary in (a, b, out): if isinstance(ary, GpuArray): if context is not None and ary.context != context: raise ValueError('cannot mix contexts') context = ary.context if cls is None or cls == GpuArray: cls = ary.__class__ need_context = False if need_context and context is None: context = get_default_context() cls = ndgpuarray # Cast input to `GpuArray` of the right dtype if necessary # TODO: figure out what to do here exactly (scalars and such) if isinstance(a, (GpuArray, numpy.ndarray)): if a.flags.f_contiguous and not a.flags.c_contiguous: order = 'F' else: order = 'C' # Determine signature here to avoid creating an intermediate GPU array sig = find_smallest_valid_signature(name, (a, ), (out, )) if not sig: raise TypeError('ufunc {!r} not supported for the input types, ' 'and the inputs could not be safely coerced' ''.format(name)) tc_in, _ = sig.split('->') a = array(a, dtype=tc_in, copy=False, order=order, context=context, cls=cls) else: a = array(a, context=context, cls=cls) sig = find_smallest_valid_signature(name, (a, ), (out, )) if not sig: raise TypeError('ufunc {!r} not supported for the input types, ' 'and the inputs could not be safely coerced' ''.format(name)) # Upcast input if necessary tc_in, tc_out = sig.split('->') if a.dtype < tc_in: a = a.astype(tc_in) # Create output array if not provided if out is None: out = empty(a.shape, dtype=tc_out, context=context, cls=cls) # --- Generate code strings for GpuElemwise --- # # C dtypes for casting c_dtype_in = dtype_to_ctype(tc_in) c_dtype_out = dtype_to_ctype(tc_out) meta = ufunc_metadata[name] assert meta['nin'] == 1 assert meta['nout'] == 1 # Create `oper` string if meta['c_op'] is not None: # Case 1: unary operator unop = meta['c_op'] if a.dtype == numpy.bool and unop == '-': if parse_version(numpy.__version__) >= parse_version('1.13'): # Numpy >= 1.13 raises a TypeError raise TypeError( 'negation of boolean arrays is not supported, use ' '`logical_not` instead') else: # Warn and remap to logical not warnings.warn( 'using negation (`-`) with boolean arrays is ' 'deprecated, use `logical_not` (`~`) instead; ' 'the current behavior will be changed along ' "with NumPy's", FutureWarning) unop = '!' oper = 'out = ({odt}) {}a'.format(unop, odt=c_dtype_out) preamble = '' elif meta['c_func'] is not None: # Case 2: C function c_func = meta['c_func'] if name in ('abs', 'absolute'): # Special case if numpy.dtype(tc_out).kind == 'u': # Shortcut for abs() with unsigned int. This also fixes a CUDA # quirk that makes abs() crash with unsigned int input. out[:] = a return out elif numpy.dtype(tc_out).kind == 'f': c_func = 'fabs' else: c_func = 'abs' oper = 'out = ({odt}) {}(a)'.format(c_func, odt=c_dtype_out) preamble_tpl = mako.template.Template(meta['oper_preamble_tpl']) preamble = preamble_tpl.render(idt=c_dtype_in, odt=c_dtype_out) elif meta['oper_fmt'] is not None: # Case 3: custom implementation with `oper` template oper = meta['oper_fmt'].format(idt=c_dtype_in, odt=c_dtype_out) preamble_tpl = mako.template.Template(meta['oper_preamble_tpl']) preamble = preamble_tpl.render(idt=c_dtype_in, odt=c_dtype_out) else: # Case 4: not implemented raise NotImplementedError('ufunc {!r} not implemented'.format(name)) # --- Generate and run GpuElemwise kernel --- # a_arg = as_argument(a, 'a', read=True) args = [arg('out', out.dtype, write=True), a_arg] ker = GpuElemwise(context, oper, args, preamble=preamble) ker(out, a) return out