def reduce(self, arg, stream=0): assert len(list(self.functions.keys())[0]) == 2, "must be a binary " \ "ufunc" assert arg.ndim == 1, "must use 1d array" n = arg.shape[0] gpu_mems = [] if n == 0: raise TypeError("Reduction on an empty array.") elif n == 1: # nothing to do return arg[0] # always use a stream stream = stream or cuda.stream() with stream.auto_synchronize(): # transfer memory to device if necessary if devicearray.is_cuda_ndarray(arg): mem = arg else: mem = cuda.to_device(arg, stream) # do reduction out = self.__reduce(mem, gpu_mems, stream) # use a small buffer to store the result element buf = np.array((1,), dtype=arg.dtype) out.copy_to_host(buf, stream=stream) return buf[0]
def argsort(self, keys, begin_bit=0, end_bit=None): """Similar to ``RadixSort.sort`` but returns the new sorted indices. :type keys: numpy.ndarray :param keys: Keys to sort inplace :type begin_bit: int :param begin_bit: The first bit to sort :type end_bit: int :param end_bit: Optional. The last bit to sort :return: The indices indicating the new ordering as an array on the CUDA device or on the host. """ d_vals = self.init_arg(keys.size) self.sort(keys, vals=d_vals, begin_bit=begin_bit, end_bit=end_bit) res = d_vals if not is_cuda_ndarray(keys): res = res.copy_to_host(stream=self.stream) return res
def is_device_array(self, obj): return devicearray.is_cuda_ndarray(obj)