Beispiel #1
0
def local_gpua_advanced_incsubtensor(node):

    # This optimization is disabled if cuda is not active
    if pygpu.get_default_context().kind != "cuda":
        return None

    x, y, ilist = node.inputs

    # Gpu Ops needs both inputs to have the same dtype
    if (x.type.dtype != y.type.dtype):
        dtype = scalar.upcast(x.type.dtype, y.type.dtype)
        if x.type.dtype != dtype:
            x = tensor.cast(x, dtype)
        if y.type.dtype != dtype:
            y = tensor.cast(y, dtype)

    set_instead_of_inc = node.op.set_instead_of_inc
    active_device_no = theano.sandbox.cuda.active_device_number()
    device_properties = theano.sandbox.cuda.device_properties

    compute_capability = device_properties(active_device_no)['major']

    if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
        return [GpuAdvancedIncSubtensor1(
                set_instead_of_inc=set_instead_of_inc)(x, y, ilist)]
    else:
        return [GpuAdvancedIncSubtensor1_dev20(
                set_instead_of_inc=set_instead_of_inc)(x, y, ilist)]
Beispiel #2
0
 def c_header_dirs(self):
     if pygpu.get_default_context().kind == 'opencl':
         raise MethodNotDefined('cuda only')
     cuda_root = config.cuda.root
     if cuda_root:
         return [os.path.join(cuda_root, 'include')]
     else:
         return []
Beispiel #3
0
 def c_header_dirs(self):
     if pygpu.get_default_context().kind == "opencl":
         raise MethodNotDefined("cuda only")
     cuda_root = config.cuda.root
     res = [os.path.dirname(__file__)]
     if cuda_root:
         res.append(os.path.join(cuda_root, "include"))
     return res
Beispiel #4
0
 def generate_kernel(self, node, odtype, redux):
     if isinstance(self.scalar_op, scalar.basic.Add):
         reduce_expr = "a + b"
     elif isinstance(self.scalar_op, scalar.basic.Mul):
         reduce_expr = "a * b"
     else:
         raise NotImplementedError()
     return ReductionKernel(pygpu.get_default_context(), odtype,
                            self.scalar_op.identity, reduce_expr, redux,
                            arguments=[make_argument(node.inputs[0], 'a')],
                            init_nd=node.inputs[0].ndim)
Beispiel #5
0
def local_gpua_advanced_incsubtensor(node):

    # This optimization is disabled if cuda is not active
    if pygpu.get_default_context().kind != "cuda":
        return None

    x, y = node.inputs[0:2]
    set_instead_of_inc = node.op.set_instead_of_inc
    active_device_no = theano.sandbox.cuda.active_device_number()
    device_properties = theano.sandbox.cuda.device_properties

    compute_capability = device_properties(active_device_no)["major"]

    if compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
        return GpuAdvancedIncSubtensor1(set_instead_of_inc=set_instead_of_inc)
    else:
        return GpuAdvancedIncSubtensor1_dev20(set_instead_of_inc=set_instead_of_inc)
Beispiel #6
0
def local_gpua_advanced_incsubtensor(node):

    # This optimization is disabled if cuda is not active
    if pygpu.get_default_context().kind != "cuda":
        return None

    x, y = node.inputs[0:2]
    set_instead_of_inc = node.op.set_instead_of_inc
    active_device_no = theano.sandbox.cuda.active_device_number()
    device_properties = theano.sandbox.cuda.device_properties

    compute_capability = device_properties(active_device_no)['major']

    if (compute_capability < 2 or x.ndim != 2 or y.ndim != 2):
        return GpuAdvancedIncSubtensor1(set_instead_of_inc=set_instead_of_inc)
    else:
        return GpuAdvancedIncSubtensor1_dev20(
            set_instead_of_inc=set_instead_of_inc)
Beispiel #7
0
def varlp_moreau_integrand_gpuary(abs_f, p, sigma, num_newton_iter, out=None):
    """Integrand of the variable Lp Moreau envelope, GpuArray version.

    abs_f : `array-like`
        Magnitude of the input function (scalar or vectorial) to the
        functional.
    p : `array-like`
        Spatially varying exponent of the Lp modular. Must have same
        shape and dtype as ``abs_f``.
    sigma : positive float
        Step-size-like parameter of the envlope.
    num_newton_iter : positive int
        Number of Newton iterations to perform for the places where
        ``1 < p < 2``.
    out : `pygpu.gpuarray.GpuArray`, optional
        Array where the result should be stored. Its ``shape`` and
        ``dtype`` must match those of ``abs_f``.

    Returns
    -------
    out : `pygpu.gpuarray.GpuArray`
        Factor for the proximal operator of the convex conjugate.
        If ``out`` was provided, the returned object is a reference to it.

    Examples
    --------
    Exponent ``p = 1`` gives the Huber function of ``abs_f``, that is
    ``abs_f ** 2 / (2 * sigma)`` if ``abs_f <= sigma`` and
    ``abs_f - sigma / 2`` otherwise:

    >>> abs_f = pygpu.gpuarray.array([0.0, 0.5, 1.0, 1.5, 2.0])
    >>> p1 = pygpu.gpuarray.array([1.0, 1.0, 1.0, 1.0, 1.0])
    >>> sigma = 1.0
    >>> result = varlp_moreau_integrand_gpuary(abs_f, p1, sigma,
    ...                                        num_newton_iter=1)
    >>> np.allclose(result, [0, 0.125, 0.5, 1.0, 1.5])
    True
    >>> sigma = 0.5
    >>> result = varlp_moreau_integrand_gpuary(abs_f, p1, sigma,
    ...                                        num_newton_iter=1)
    >>> np.allclose(result, [0, 0.25, 0.75, 1.25, 1.75])
    True

    With ``p = 2`` one gets ``abs_f ** 2 / (1 + 2 * sigma)``:

    >>> p2 = pygpu.gpuarray.array([2.0, 2.0, 2.0, 2.0, 2.0])
    >>> sigma = 0.5
    >>> result = varlp_moreau_integrand_gpuary(abs_f, p2, sigma,
    ...                                        num_newton_iter=1)
    >>> np.allclose(result, [0, 0.125, 0.5, 1.125, 2])
    True
    """
    ctx = pygpu.get_default_context()
    assert ctx is not None

    abs_f = pygpu.gpuarray.array(abs_f, copy=False)
    p = pygpu.gpuarray.array(p, copy=False)
    assert abs_f.dtype in (np.dtype('float32'), np.dtype('float64'))
    if out is None:
        out = abs_f._empty_like_me()
    sigma = float(sigma)
    num_newton_iter = int(num_newton_iter)
    args = [abs_f, p, sigma, num_newton_iter]
    argnames = ['abs_f', 'p', 'sigma', 'num_newton_iter']

    # Render the preamble code from the mako template using the specific
    # definitions of dtype, maximum and power.
    if abs_f.dtype == np.dtype('float32') and ctx.kind == b'opencl':
        raise NotImplementedError("OpenCL kernels currently not supported "
                                  "for 'float32' data type")

    # Render the preamble source from templates
    pre_tpl = mako.template.Template(newton_tpl_str + moreau_integr_tpl_str)
    power = 'powf' if abs_f.dtype == np.dtype('float32') else 'pow'
    minimum = 'fminf' if abs_f.dtype == np.dtype('float32') else 'fmin'
    maximum = 'fmaxf' if abs_f.dtype == np.dtype('float32') else 'fmax'
    preamble = pre_tpl.render(dtype=DTYPE_TO_CTYPE[abs_f.dtype],
                              maximum=maximum, minimum=minimum, power=power)

    # Define the elementwise expression
    expr = ('out = varlp_moreau_integrand(abs_f, p, sigma, num_newton_iter)')
    return elemwise(args, argnames, expr, preamble, out, 'out')
Beispiel #8
0
def varlp_cc_integrand_gpuary(abs_f, p, out=None):
    """Integrand for the variable Lp convex conjugate, GpuArray version.

    abs_f : `array-like`
        Magnitude of the input function (scalar or vectorial) to the
        functional.
    p : `array-like`
        Spatially varying exponent of the Lp modular. Must have same
        shape and dtype as ``abs_f``.
    out : `pygpu.gpuarray.GpuArray`, optional
        Array where the result should be stored. Its ``shape`` and
        ``dtype`` must match those of ``abs_f``.

    Returns
    -------
    out : `pygpu.gpuarray.GpuArray`
        Integrand of the convex conjugate.
        If ``out`` was provided, the returned object is a reference to it.

    Examples
    --------
    Exponent ``p = 1`` gives the indicator of the unit ball:

    >>> abs_f = pygpu.gpuarray.array([0.0, 0.5, 1.0, 1.5, 2.0])
    >>> p1 = pygpu.gpuarray.array([1.0, 1.0, 1.0, 1.0, 1.0])
    >>> result = varlp_cc_integrand_gpuary(abs_f, p1)
    >>> np.allclose(result, [0, 0, 0, np.inf, np.inf])
    True

    With ``p = 2`` one gets ``abs_f ** 2 / 4``:

    >>> p2 = pygpu.gpuarray.array([2.0, 2.0, 2.0, 2.0, 2.0])
    >>> result = varlp_cc_integrand_gpuary(abs_f, p2)
    >>> np.allclose(result, np.asarray(abs_f) ** 2 / 4)
    True

    For other ``p`` values, the result is ``abs_f**(p/(p-1)) * r``,
    where ``r = p**(-1/(p-1)) - p**(-p/(p-1))``:

    >>> p15 = pygpu.gpuarray.array([1.5, 1.5, 1.5, 1.5, 1.5])
    >>> result = varlp_cc_integrand_gpuary(abs_f, p15)
    >>> p = np.asarray(p15)
    >>> r = p ** (-1 / (p - 1)) - p ** (-p / (p - 1))
    >>> np.allclose(result, np.asarray(abs_f) ** (p / (p - 1)) * r)
    True
    """
    ctx = pygpu.get_default_context()
    assert ctx is not None

    abs_f = pygpu.gpuarray.array(abs_f, copy=False)
    p = pygpu.gpuarray.array(p, copy=False)
    assert abs_f.dtype in (np.dtype('float32'), np.dtype('float64'))
    if out is None:
        out = abs_f._empty_like_me()
    args = [abs_f, p]
    argnames = ['abs_f', 'p']

    # Render the preamble code from the mako template using the specific
    # definitions of dtype, maximum and power.
    if abs_f.dtype == np.dtype('float32') and ctx.kind == b'opencl':
        raise NotImplementedError("OpenCL kernels currently not supported "
                                  "for 'float32' data type")

    # Render the preamble source from templates
    pre_tpl = mako.template.Template(cc_integr_tpl_str)
    power = 'powf' if abs_f.dtype == np.dtype('float32') else 'pow'
    preamble = pre_tpl.render(dtype=DTYPE_TO_CTYPE[abs_f.dtype],
                              power=power)

    # Define the elementwise expression
    expr = ('out = varlp_cc_integrand(abs_f, p)')
    return elemwise(args, argnames, expr, preamble, out, 'out')
Beispiel #9
0
def varlp_cc_prox_factor_gpuary(abs_f, p, sigma, num_newton_iter, out=None):
    """Multiplicative factor for the variable Lp cc prox, GpuArray version.

    abs_f : `array-like`
        Magnitude of the input function (scalar or vectorial) to the
        proximal.
    p : `array-like`
        Spatially varying exponent of the Lp modular. Must have same
        shape and dtype as ``abs_f``.
    sigma : positive float
        Step-size-like parameter of the proximal.
    num_newton_iter : positive int
        Number of Newton iterations to perform for the places where
        ``1 < p < 2``.
    out : `pygpu.gpuarray.GpuArray`, optional
        Array where the result should be stored. Its ``shape`` and
        ``dtype`` must match those of ``abs_f``.

    Returns
    -------
    out : `pygpu.gpuarray.GpuArray`
        Factor for the proximal operator of the convex conjugate.
        If ``out`` was provided, the returned object is a reference to it.

    Examples
    --------
    When ``abs_f == 0``, the returned value is always 0.
    Otherwise, exponent ``p = 1`` gives ``min(1, 1 / abs_f)``:

    >>> abs_f = np.array([0.0, 0.5, 1.0, 1.5, 2.0])
    >>> p1 = np.array([1.0, 1.0, 1.0, 1.0, 1.0])
    >>> sigma = 1.0
    >>> result = varlp_cc_prox_factor_gpuary(abs_f, p1, sigma,
    ...                                      num_newton_iter=1)
    >>> np.allclose(result, [0, 1, 1, 2.0 / 3.0, 0.5])
    True

    With ``p = 2`` one gets ``2 / (2 + sigma)``:

    >>> p2 = np.array([2.0, 2.0, 2.0, 2.0, 2.0])
    >>> sigma = 0.5
    >>> result = varlp_cc_prox_factor_gpuary(abs_f, p2, sigma,
    ...                                      num_newton_iter=1)
    >>> np.allclose(result, [0] + [0.8] * 4)
    True

    For other ``p`` values, the result is ``1 - v / abs_f``, where ``v``
    satisfies the equation ``v + sigma**(1-p) * p * v**(p-1) = abs_f``:

    >>> p15 = pygpu.gpuarray.array([1.5, 1.5, 1.5, 1.5, 1.5])
    >>> sigma = 1.0
    >>> result = varlp_cc_prox_factor_gpuary(abs_f, p15, sigma,
    ...                                      num_newton_iter=10)
    >>> v = (1 - np.asarray(result)) * abs_f
    >>> p = np.asarray(p15)
    >>> lhs = v + sigma ** (1 - p) * p * v ** (p - 1)
    >>> np.allclose(lhs, abs_f)
    True
    """
    ctx = pygpu.get_default_context()
    assert ctx is not None

    abs_f = pygpu.gpuarray.array(abs_f, copy=False)
    p = pygpu.gpuarray.array(p, copy=False)
    assert abs_f.dtype in (np.dtype('float32'), np.dtype('float64'))
    if out is None:
        out = abs_f._empty_like_me()
    sigma = float(sigma)
    num_newton_iter = int(num_newton_iter)
    args = [abs_f, p, sigma, num_newton_iter]
    argnames = ['abs_f', 'p', 'sigma', 'num_newton_iter']

    # Render the preamble code from the mako template using the specific
    # definitions of dtype, maximum and power.
    if abs_f.dtype == np.dtype('float32') and ctx.kind == b'opencl':
        raise NotImplementedError("OpenCL kernels currently not supported "
                                  "for 'float32' data type")

    # Render the preamble source from templates
    pre_tpl = mako.template.Template(newton_tpl_str + cc_prox_tpl_str)
    power = 'powf' if abs_f.dtype == np.dtype('float32') else 'pow'
    minimum = 'fminf' if abs_f.dtype == np.dtype('float32') else 'fmin'
    maximum = 'fmaxf' if abs_f.dtype == np.dtype('float32') else 'fmax'
    preamble = pre_tpl.render(dtype=DTYPE_TO_CTYPE[abs_f.dtype],
                              maximum=maximum, minimum=minimum, power=power)

    # Define the elementwise expression
    expr = ('out = varlp_cc_prox_factor(abs_f, p, sigma, num_newton_iter)')
    return elemwise(args, argnames, expr, preamble, out, 'out')
Beispiel #10
0
 def c_init_code(self):
     if pygpu.get_default_context().kind == 'opencl':
         raise MethodNotDefined('cuda only')
     return ['setup_ext_cuda();']
Beispiel #11
0
 def c_headers(self):
     if pygpu.get_default_context().kind == 'opencl':
         raise MethodNotDefined('cuda only')
     return ['<stdint.h>', '<stdio.h>', 'cuda.h',
             '<gpuarray/extension.h>', '<numpy_compat.h>',
             '<gpuarray/ext_cuda.h>', '<gpuarray/types.h>']
Beispiel #12
0
 def c_headers(self):
     if pygpu.get_default_context().kind == "opencl":
         raise MethodNotDefined("cuda only")
     return ["cuda.h", "<numpy_compat.h>", "<gpuarray_helper.h>", "<gpuarray/types.h>"]
Beispiel #13
0
 def c_header_dirs(self):
     if pygpu.get_default_context().kind == 'opencl':
         raise MethodNotDefined('cuda only')
     cuda_root = config.cuda.root
     if cuda_root:
         return [os.path.join(cuda_root, 'include')]