Ejemplo n.º 1
0
def _autodevice(ary, stream):
    if ary is not None:
        dptr, conv = auto_device(ary, stream=stream)
        yield dptr
        if conv:
            dptr.copy_to_host(ary, stream=stream)
    else:
        yield None
Ejemplo n.º 2
0
    def test_auto_device(self):
        # Create record from host record
        hostrec = self.hostnz.copy()
        devrec, new_gpu_obj = auto_device(hostrec)
        self._check_device_record(hostrec, devrec)
        self.assertTrue(new_gpu_obj)

        # Copy data back and check it is equal to auto_device arg
        hostrec2 = self.hostz.copy()
        devrec.copy_to_host(hostrec2)
        np.testing.assert_equal(hostrec2, hostrec)
Ejemplo n.º 3
0
    def test_auto_device(self):
        # Create record from host record
        hostrec = self.hostnz.copy()
        devrec, new_gpu_obj = auto_device(hostrec)
        self._check_device_record(hostrec, devrec)
        self.assertTrue(new_gpu_obj)

        # Copy data back and check it is equal to auto_device arg
        hostrec2 = self.hostz.copy()
        devrec.copy_to_host(hostrec2)
        np.testing.assert_equal(hostrec2, hostrec)
Ejemplo n.º 4
0
def _autodevice(ary, stream, firstk=None):
    if ary is not None:
        dptr, conv = auto_device(ary, stream=stream)
        yield dptr
        if conv:
            if firstk is None:
                dptr.copy_to_host(ary, stream=stream)
            else:
                dptr.bind(stream)[:firstk].copy_to_host(ary[:firstk],
                                                        stream=stream)
    else:
        yield None
Ejemplo n.º 5
0
def test_segsort_operation():
    # a crude segsort test

    maxcount = 1000

    keys = np.random.rand(maxcount)
    reference = keys.copy()
    original = keys.copy()
    values = np.arange(keys.size, dtype=np.int32)
    segments = np.arange(64, maxcount, 64, dtype=np.int32)

    dptr_keys, _ = auto_device(keys)
    keys[:] = 0
    dptr_values, _ = auto_device(values)
    values[:] = 0
    dptr_segments, _ = auto_device(segments)

    def runsort(d_keys, d_vals, d_seg):
        _sort = _bind_segsort_double()
        _sort(device_pointer(d_keys), device_pointer(d_vals), d_keys.size,
              device_pointer(d_seg), d_seg.size, 0)

    runsort(dptr_keys, dptr_values, dptr_segments)

    # copy back
    dptr_keys.copy_to_host(keys)
    dptr_values.copy_to_host(values)

    # compare
    r = [z for z in segments]
    low = [0] + r
    high = r + [maxcount]
    for x, y in zip(low, high):
        reference[x:y].sort()

    np.testing.assert_equal(keys, reference)
    np.testing.assert_equal(original[values], reference)
Ejemplo n.º 6
0
def segmented_sort(keys, vals, segments, stream=0):
    """Performs an inplace sort on small segments (N < 1e6).

    :type keys: numpy.ndarray
    :param keys: Keys to sort inplace.
    :type vals: numpy.ndarray
    :param vals: Values to be reordered inplace along the sort. Only the
                 ``uint32`` dtype is supported in this implementation.
    :type segments: numpy.ndarray
    :param segments: Segment separation location. e.g. ``array([3, 6, 8])`` for
                     segments of  ``keys[:3]``, ``keys[3:6]``, ``keys[6:8]``,
                     ``keys[8:]``.
    :param stream: Optional. A cuda stream in which the kernels are executed.
    """
    with _autodevice(keys, stream) as d_keys:
        with _autodevice(vals, stream) as d_vals:
            d_segments, _ = auto_device(segments, stream=stream)
            _segmentedsort(d_keys, d_vals, d_segments, stream)
Ejemplo n.º 7
0
def test_radixsort_operation():
    # a crude radixsort test
    dtype = np.float64
    maxcount = 1000

    keys = np.random.rand(maxcount)
    reference = np.copy(keys)

    # copy to device
    dptr, _ = auto_device(keys)

    def runsort(temp, keys, vals, begin_bit=0, end_bit=None):
        stream = 0
        begin_bit = 0
        dtty = np.dtype(dtype)
        end_bit = dtty.itemsize * 8
        descending = 0
        count = maxcount
        if keys:
            count = keys.size

        _arysize = int(maxcount * dtty.itemsize)
        _sort = _bind_radixsort_double()

        ctx = cuda.current_context()
        _temp_keys = ctx.memalloc(_arysize)

        return _sort(temp, ctypes.c_uint(count), device_pointer(keys),
                     device_pointer(_temp_keys), None, None, stream,
                     descending, begin_bit, end_bit)

    # tmp storage ref
    temp = runsort(None, None, None)

    # do the sort
    runsort(temp, dptr, None)

    # copy back
    dptr.copy_to_host(keys)

    # compare
    np.testing.assert_equal(np.sort(reference), keys)