예제 #1
0
 def _gen_basic(self, ls, nd):
     src = basic_kernel.render(preamble=self.preamble,
                               reduce_expr=self.reduce_expr,
                               name="reduk",
                               out_arg=self.out_arg,
                               nd=nd,
                               arguments=self.arguments,
                               local_size=ls,
                               redux=self.redux,
                               neutral=self.neutral,
                               map_expr=self.expression)
     spec = ['uint32', gpuarray.GpuArray]
     spec.extend('uint32' for _ in range(nd))
     for i, arg in enumerate(self.arguments):
         spec.append(arg.spec())
         if arg.isarray():
             spec.append('uint32')
             spec.extend('int32' for _ in range(nd))
     k = gpuarray.GpuKernel(src,
                            "reduk",
                            spec,
                            context=self.context,
                            cluda=True,
                            **self.flags)
     return k, src, spec
예제 #2
0
 def _make_basic(self, nd):
     name = "elem_" + str(nd)
     src = self.render_basic(nd, name=name)
     return gpuarray.GpuKernel(src,
                               name,
                               self.argspec_basic(nd),
                               context=self.context,
                               cluda=True,
                               **self.flags)
예제 #3
0
    def __init__(self,
                 context,
                 arguments,
                 operation,
                 preamble="",
                 dimspec_limit=2,
                 spec_limit=10):
        if isinstance(arguments, str):
            self.arguments = parse_c_args(arguments)
        else:
            self.arguments = tuple(arguments)

        self.operation = operation
        self.expression = massage_op(operation)
        self.context = context
        self._spec_limit = spec_limit
        self._dimspec_limit = dimspec_limit

        if not any(arg.isarray() for arg in self.arguments):
            raise RuntimeError("ElemwiseKernel can only be used with "
                               "functions that have at least one "
                               "vector argument.")

        have_small = False
        have_double = False
        have_complex = False
        for arg in self.arguments:
            if arg.dtype.itemsize < 4 and type(arg) == ArrayArg:
                have_small = True
            if arg.dtype in [numpy.float64, numpy.complex128]:
                have_double = True
            if arg.dtype in [numpy.complex64, numpy.complex128]:
                have_complex = True

        self.flags = dict(have_small=have_small,
                          have_double=have_double,
                          have_complex=have_complex)
        self.preamble = preamble

        self.contig_src = contiguous_kernel.render(preamble=self.preamble,
                                                   name="elem_contig",
                                                   arguments=self.arguments,
                                                   expression=self.operation)
        self.contig_k = gpuarray.GpuKernel(self.contig_src,
                                           "elem_contig",
                                           self.argspec_contig(),
                                           context=self.context,
                                           cluda=True,
                                           **self.flags)
        self._speckey = None
        self._dims = None
예제 #4
0
 def _make_dimspec(self, n, nd, dims):
     src = dimspec_kernel.render(preamble=self.preamble,
                                 name="elemk",
                                 n=n,
                                 nd=nd,
                                 dims=dims,
                                 arguments=self.arguments,
                                 expression=self.expression)
     return gpuarray.GpuKernel(src,
                               "elemk",
                               self.argspec_dimspec(nd),
                               context=self.context,
                               cluda=True,
                               **self.flags)
예제 #5
0
 def _make_specialized(self, n, nd, dims, strs, offsets):
     src = specialized_kernel.render(preamble=self.preamble,
                                     name="elemk",
                                     n=n,
                                     nd=nd,
                                     dim=dims,
                                     strs=strs,
                                     arguments=self.arguments,
                                     expression=self.expression,
                                     offsets=offsets)
     return gpuarray.GpuKernel(src,
                               "elemk",
                               self.argspec_specialized(),
                               context=self.context,
                               cluda=True,
                               **self.flags)