Beispiel #1
0
 def _GpuKernelBase_version(self):
     ctx = gpuarray.get_default_context()
     return (2, ctx.kind, ctx.devname)
Beispiel #2
0
 def _GpuKernelBase_version(self):
     ctx = gpuarray.get_default_context()
     return (2, ctx.kind, ctx.devname)
Beispiel #3
0
def ufunc21(name, a, b, out=None, context=None):
    """Call a ufunc with 2 inputs and 1 output.

    Parameters
    ----------
    name : str
        Name of the NumPy ufunc.
    a, b : `array-like`
        Input arrays to which the ufunc should be applied.
    out : `pygpu.gpuarray.GpuArray`, optional
        Array in which to store the result.
    context : `pygpu.gpuarray.GpuContext`, optional
        Use this GPU context to evaluate the GPU kernel. For ``None``,
        if no GPU array is among the provided parameters, a default
        GPU context must have been set.

    Returns
    -------
    out : `pygpu.gpuarray.GpuArray`
        Result of the computation. If ``out`` was given, the returned
        object is a reference to it.
        The type of the returned array is `pygpu._array.ndgpuarray` if

        - no GPU array was among the parameters or
        - one of the parameters had type `pygpu._array.ndgpuarray`.
    """
    # Lazy import to avoid circular dependency
    from pygpu._array import ndgpuarray

    # --- Prepare input array --- #

    # Determine GPU context and class. Use the "highest" class present in the
    # inputs, defaulting to `ndgpuarray`
    need_context = True
    cls = None
    for ary in (a, b, out):
        if isinstance(ary, GpuArray):
            if context is not None and ary.context != context:
                raise ValueError('cannot mix contexts')
            context = ary.context
            if cls is None or cls == GpuArray:
                cls = ary.__class__
            need_context = False

    if need_context and context is None:
        context = get_default_context()
        cls = ndgpuarray

    # Cast input to `GpuArray` of the right dtype if necessary
    # TODO: figure out what to do here exactly (scalars and such)
    if isinstance(a, (GpuArray, numpy.ndarray)):
        if a.flags.f_contiguous and not a.flags.c_contiguous:
            order = 'F'
        else:
            order = 'C'

        # Determine signature here to avoid creating an intermediate GPU array
        sig = find_smallest_valid_signature(name, (a, ), (out, ))
        if not sig:
            raise TypeError('ufunc {!r} not supported for the input types, '
                            'and the inputs could not be safely coerced'
                            ''.format(name))

        tc_in, _ = sig.split('->')
        a = array(a,
                  dtype=tc_in,
                  copy=False,
                  order=order,
                  context=context,
                  cls=cls)
    else:
        a = array(a, context=context, cls=cls)

        sig = find_smallest_valid_signature(name, (a, ), (out, ))
        if not sig:
            raise TypeError('ufunc {!r} not supported for the input types, '
                            'and the inputs could not be safely coerced'
                            ''.format(name))

        # Upcast input if necessary
        tc_in, tc_out = sig.split('->')
        if a.dtype < tc_in:
            a = a.astype(tc_in)

    # Create output array if not provided
    if out is None:
        out = empty(a.shape, dtype=tc_out, context=context, cls=cls)

    # --- Generate code strings for GpuElemwise --- #

    # C dtypes for casting
    c_dtype_in = dtype_to_ctype(tc_in)
    c_dtype_out = dtype_to_ctype(tc_out)

    meta = ufunc_metadata[name]
    assert meta['nin'] == 1
    assert meta['nout'] == 1

    # Create `oper` string
    if meta['c_op'] is not None:
        # Case 1: unary operator
        unop = meta['c_op']
        if a.dtype == numpy.bool and unop == '-':
            if parse_version(numpy.__version__) >= parse_version('1.13'):
                # Numpy >= 1.13 raises a TypeError
                raise TypeError(
                    'negation of boolean arrays is not supported, use '
                    '`logical_not` instead')
            else:
                # Warn and remap to logical not
                warnings.warn(
                    'using negation (`-`) with boolean arrays is '
                    'deprecated, use `logical_not` (`~`) instead; '
                    'the current behavior will be changed along '
                    "with NumPy's", FutureWarning)
                unop = '!'
        oper = 'out = ({odt}) {}a'.format(unop, odt=c_dtype_out)
        preamble = ''

    elif meta['c_func'] is not None:
        # Case 2: C function
        c_func = meta['c_func']

        if name in ('abs', 'absolute'):
            # Special case
            if numpy.dtype(tc_out).kind == 'u':
                # Shortcut for abs() with unsigned int. This also fixes a CUDA
                # quirk that makes abs() crash with unsigned int input.
                out[:] = a
                return out
            elif numpy.dtype(tc_out).kind == 'f':
                c_func = 'fabs'
            else:
                c_func = 'abs'

        oper = 'out = ({odt}) {}(a)'.format(c_func, odt=c_dtype_out)
        preamble_tpl = mako.template.Template(meta['oper_preamble_tpl'])
        preamble = preamble_tpl.render(idt=c_dtype_in, odt=c_dtype_out)

    elif meta['oper_fmt'] is not None:
        # Case 3: custom implementation with `oper` template
        oper = meta['oper_fmt'].format(idt=c_dtype_in, odt=c_dtype_out)
        preamble_tpl = mako.template.Template(meta['oper_preamble_tpl'])
        preamble = preamble_tpl.render(idt=c_dtype_in, odt=c_dtype_out)

    else:
        # Case 4: not implemented
        raise NotImplementedError('ufunc {!r} not implemented'.format(name))

    # --- Generate and run GpuElemwise kernel --- #

    a_arg = as_argument(a, 'a', read=True)
    args = [arg('out', out.dtype, write=True), a_arg]

    ker = GpuElemwise(context, oper, args, preamble=preamble)
    ker(out, a)
    return out