Ejemplo n.º 1
0
    def reduce(self, arg, stream=0):
        assert len(list(self.functions.keys())[0]) == 2, "must be a binary " \
                                                         "ufunc"
        assert arg.ndim == 1, "must use 1d array"

        n = arg.shape[0]
        gpu_mems = []

        if n == 0:
            raise TypeError("Reduction on an empty array.")
        elif n == 1:  # nothing to do
            return arg[0]

        # always use a stream
        stream = stream or cuda.stream()
        with stream.auto_synchronize():
            # transfer memory to device if necessary
            if devicearray.is_cuda_ndarray(arg):
                mem = arg
            else:
                mem = cuda.to_device(arg, stream)
                # do reduction
            out = self.__reduce(mem, gpu_mems, stream)
            # use a small buffer to store the result element
            buf = np.array((1,), dtype=arg.dtype)
            out.copy_to_host(buf, stream=stream)

        return buf[0]
Ejemplo n.º 2
0
    def reduce(self, arg, stream=0):
        assert len(list(self.functions.keys())[0]) == 2, "must be a binary " \
                                                         "ufunc"
        assert arg.ndim == 1, "must use 1d array"

        n = arg.shape[0]
        gpu_mems = []

        if n == 0:
            raise TypeError("Reduction on an empty array.")
        elif n == 1:  # nothing to do
            return arg[0]

        # always use a stream
        stream = stream or cuda.stream()
        with stream.auto_synchronize():
            # transfer memory to device if necessary
            if devicearray.is_cuda_ndarray(arg):
                mem = arg
            else:
                mem = cuda.to_device(arg, stream)
                # do reduction
            out = self.__reduce(mem, gpu_mems, stream)
            # use a small buffer to store the result element
            buf = np.array((1,), dtype=arg.dtype)
            out.copy_to_host(buf, stream=stream)

        return buf[0]
Ejemplo n.º 3
0
    def argsort(self, keys, begin_bit=0, end_bit=None):
        """Similar to ``RadixSort.sort`` but returns the new sorted indices.

        :type keys: numpy.ndarray
        :param keys: Keys to sort inplace
        :type begin_bit: int
        :param begin_bit: The first bit to sort
        :type end_bit: int
        :param end_bit: Optional. The last bit to sort
        :return: The indices indicating the new ordering as an array on the CUDA
                 device or on the host.
        """
        d_vals = self.init_arg(keys.size)
        self.sort(keys, vals=d_vals, begin_bit=begin_bit, end_bit=end_bit)
        res = d_vals
        if not is_cuda_ndarray(keys):
            res = res.copy_to_host(stream=self.stream)
        return res
Ejemplo n.º 4
0
 def is_device_array(self, obj):
     return devicearray.is_cuda_ndarray(obj)
Ejemplo n.º 5
0
 def is_device_array(self, obj):
     return devicearray.is_cuda_ndarray(obj)