def _autodevice(ary, stream): if ary is not None: dptr, conv = auto_device(ary, stream=stream) yield dptr if conv: dptr.copy_to_host(ary, stream=stream) else: yield None
def test_auto_device(self): # Create record from host record hostrec = self.hostnz.copy() devrec, new_gpu_obj = auto_device(hostrec) self._check_device_record(hostrec, devrec) self.assertTrue(new_gpu_obj) # Copy data back and check it is equal to auto_device arg hostrec2 = self.hostz.copy() devrec.copy_to_host(hostrec2) np.testing.assert_equal(hostrec2, hostrec)
def _autodevice(ary, stream, firstk=None): if ary is not None: dptr, conv = auto_device(ary, stream=stream) yield dptr if conv: if firstk is None: dptr.copy_to_host(ary, stream=stream) else: dptr.bind(stream)[:firstk].copy_to_host(ary[:firstk], stream=stream) else: yield None
def test_segsort_operation(): # a crude segsort test maxcount = 1000 keys = np.random.rand(maxcount) reference = keys.copy() original = keys.copy() values = np.arange(keys.size, dtype=np.int32) segments = np.arange(64, maxcount, 64, dtype=np.int32) dptr_keys, _ = auto_device(keys) keys[:] = 0 dptr_values, _ = auto_device(values) values[:] = 0 dptr_segments, _ = auto_device(segments) def runsort(d_keys, d_vals, d_seg): _sort = _bind_segsort_double() _sort(device_pointer(d_keys), device_pointer(d_vals), d_keys.size, device_pointer(d_seg), d_seg.size, 0) runsort(dptr_keys, dptr_values, dptr_segments) # copy back dptr_keys.copy_to_host(keys) dptr_values.copy_to_host(values) # compare r = [z for z in segments] low = [0] + r high = r + [maxcount] for x, y in zip(low, high): reference[x:y].sort() np.testing.assert_equal(keys, reference) np.testing.assert_equal(original[values], reference)
def segmented_sort(keys, vals, segments, stream=0): """Performs an inplace sort on small segments (N < 1e6). :type keys: numpy.ndarray :param keys: Keys to sort inplace. :type vals: numpy.ndarray :param vals: Values to be reordered inplace along the sort. Only the ``uint32`` dtype is supported in this implementation. :type segments: numpy.ndarray :param segments: Segment separation location. e.g. ``array([3, 6, 8])`` for segments of ``keys[:3]``, ``keys[3:6]``, ``keys[6:8]``, ``keys[8:]``. :param stream: Optional. A cuda stream in which the kernels are executed. """ with _autodevice(keys, stream) as d_keys: with _autodevice(vals, stream) as d_vals: d_segments, _ = auto_device(segments, stream=stream) _segmentedsort(d_keys, d_vals, d_segments, stream)
def test_radixsort_operation(): # a crude radixsort test dtype = np.float64 maxcount = 1000 keys = np.random.rand(maxcount) reference = np.copy(keys) # copy to device dptr, _ = auto_device(keys) def runsort(temp, keys, vals, begin_bit=0, end_bit=None): stream = 0 begin_bit = 0 dtty = np.dtype(dtype) end_bit = dtty.itemsize * 8 descending = 0 count = maxcount if keys: count = keys.size _arysize = int(maxcount * dtty.itemsize) _sort = _bind_radixsort_double() ctx = cuda.current_context() _temp_keys = ctx.memalloc(_arysize) return _sort(temp, ctypes.c_uint(count), device_pointer(keys), device_pointer(_temp_keys), None, None, stream, descending, begin_bit, end_bit) # tmp storage ref temp = runsort(None, None, None) # do the sort runsort(temp, dptr, None) # copy back dptr.copy_to_host(keys) # compare np.testing.assert_equal(np.sort(reference), keys)