Beispiel #1
0
def test_key_value_sorter(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 10**5
    nkeys = 2000
    from pyopencl.clrandom import rand as clrand
    keys = clrand(queue, n, np.int32, b=nkeys)
    values = clrand(queue, n, np.int32, b=n).astype(np.int64)

    assert np.max(keys.get()) < nkeys

    from pyopencl.algorithm import KeyValueSorter
    kvs = KeyValueSorter(context)
    starts, lists, evt = kvs(queue, keys, values, nkeys, starts_dtype=np.int32)

    starts = starts.get()
    lists = lists.get()

    mydict = dict()
    for k, v in zip(keys.get(), values.get()):
        mydict.setdefault(k, []).append(v)

    for i in range(nkeys):
        start, end = starts[i:i+2]
        assert sorted(mydict[i]) == sorted(lists[start:end])
Beispiel #2
0
def test_comparisons(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    l = 20000
    a_dev = clrand(queue, (l,), dtype=np.float32)
    b_dev = clrand(queue, (l,), dtype=np.float32)

    a = a_dev.get()
    b = b_dev.get()

    import operator as o
    for op in [o.eq, o.ne, o.le, o.lt, o.ge, o.gt]:
        res_dev = op(a_dev, b_dev)
        res = op(a, b)

        assert (res_dev.get() == res).all()

        res_dev = op(a_dev, 0)
        res = op(a, 0)

        assert (res_dev.get() == res).all()

        res_dev = op(0, b_dev)
        res = op(0, b)

        assert (res_dev.get() == res).all()
Beispiel #3
0
def test_astype(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    if not has_double_support(context.devices[0]):
        from pytest import skip
        skip("double precision not supported on %s" % context.devices[0])

    a_gpu = clrand(queue, (2000,), dtype=np.float32)

    a = a_gpu.get().astype(np.float64)
    a2 = a_gpu.astype(np.float64).get()

    assert a2.dtype == np.float64
    assert la.norm(a - a2) == 0, (a, a2)

    a_gpu = clrand(queue, (2000,), dtype=np.float64)

    a = a_gpu.get().astype(np.float32)
    a2 = a_gpu.astype(np.float32).get()

    assert a2.dtype == np.float32
    assert la.norm(a - a2) / la.norm(a) < 1e-7
Beispiel #4
0
def general_clrand(queue, shape, dtype):
    from pyopencl.clrandom import rand as clrand

    dtype = np.dtype(dtype)
    if dtype.kind == "c":
        real_dtype = dtype.type(0).real.dtype
        return clrand(queue, shape, real_dtype) + 1j*clrand(queue, shape, real_dtype)
    else:
        return clrand(queue, shape, dtype)
Beispiel #5
0
    def test_dot(ctx_getter):
        from pyopencl.clrandom import rand as clrand
        a_gpu = clrand(context, queue, (200000,))
        a = a_gpu.get()
        b_gpu = clrand(context, queue, (200000,))
        b = b_gpu.get()

        dot_ab = numpy.dot(a, b)

        dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

        assert abs(dot_ab_gpu-dot_ab)/abs(dot_ab) < 1e-4
Beispiel #6
0
def test_dot(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand
    a_gpu = clrand(context, queue, (200000,), np.float32)
    a = a_gpu.get()
    b_gpu = clrand(context, queue, (200000,), np.float32)
    b = b_gpu.get()

    dot_ab = np.dot(a, b)

    dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

    assert abs(dot_ab_gpu-dot_ab)/abs(dot_ab) < 1e-4
Beispiel #7
0
def test_unique(ctx_factory):
    from pytest import importorskip

    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    for n in scan_test_counts:
        a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
        a = a_dev.get()
        a = np.sort(a)
        a_dev = cl_array.to_device(queue, a)

        a_unique_host = np.unique(a)

        from pyopencl.algorithm import unique

        a_unique_dev, count_unique_dev, evt = unique(a_dev)

        count_unique_dev = count_unique_dev.get()

        assert (a_unique_dev.get()[:count_unique_dev] == a_unique_host).all()
        from gc import collect

        collect()
Beispiel #8
0
def test_struct_reduce(ctx_factory):
    pytest.importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dev, = context.devices
    if (dev.vendor == "NVIDIA" and dev.platform.vendor == "Apple"
            and dev.driver_version == "8.12.47 310.40.00.05f01"):
        pytest.skip("causes a compiler hang on Apple/Nv GPU")

    mmc_dtype, mmc_c_decl = make_mmc_dtype(context.devices[0])

    preamble = mmc_c_decl + r"""//CL//

    minmax_collector mmc_neutral()
    {
        // FIXME: needs infinity literal in real use, ok here
        minmax_collector result;
        result.cur_min = 1<<30;
        result.cur_max = -(1<<30);
        return result;
    }

    minmax_collector mmc_from_scalar(float x)
    {
        minmax_collector result;
        result.cur_min = x;
        result.cur_max = x;
        return result;
    }

    minmax_collector agg_mmc(minmax_collector a, minmax_collector b)
    {
        minmax_collector result = a;
        if (b.cur_min < result.cur_min)
            result.cur_min = b.cur_min;
        if (b.cur_max > result.cur_max)
            result.cur_max = b.cur_max;
        return result;
    }

    """

    from pyopencl.clrandom import rand as clrand
    a_gpu = clrand(queue, (20000,), dtype=np.int32, a=0, b=10**6)
    a = a_gpu.get()

    from pyopencl.reduction import ReductionKernel
    red = ReductionKernel(context, mmc_dtype,
            neutral="mmc_neutral()",
            reduce_expr="agg_mmc(a, b)", map_expr="mmc_from_scalar(x[i])",
            arguments="__global int *x", preamble=preamble)

    minmax = red(a_gpu).get()
    #print minmax["cur_min"], minmax["cur_max"]
    #print np.min(a), np.max(a)

    assert abs(minmax["cur_min"] - np.min(a)) < 1e-5
    assert abs(minmax["cur_max"] - np.max(a)) < 1e-5
Beispiel #9
0
    def test_subset_minmax(ctx_getter):
        context = ctx_getter()
        queue = cl.CommandQueue(context)

        from pyopencl.clrandom import rand as clrand

        l_a = 200000
        gran = 5
        l_m = l_a - l_a // gran + 1

        if has_double_support():
            dtypes = [numpy.float64, numpy.float32, numpy.int32]
        else:
            dtypes = [numpy.float32, numpy.int32]

        for dtype in dtypes:
            a_gpu = clrand(context, queue, (l_a,), dtype)
            a = a_gpu.get()

            meaningful_indices_gpu = cl_array.zeros(l_m, dtype=numpy.int32)
            meaningful_indices = meaningful_indices_gpu.get()
            j = 0
            for i in range(len(meaningful_indices)):
                meaningful_indices[i] = j
                j = j + 1
                if j % gran == 0:
                    j = j + 1

            meaningful_indices_gpu = cl_array.to_device(meaningful_indices)
            b = a[meaningful_indices]

            min_a = numpy.min(b)
            min_a_gpu = cl_array.subset_min(meaningful_indices_gpu, a_gpu).get()

            assert min_a_gpu == min_a
Beispiel #10
0
def test_partition(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand
    for n in scan_test_counts:
        print("part", n)

        a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
        a = a_dev.get()

        crit = a_dev.dtype.type(300)
        true_host = a[a > crit]
        false_host = a[a <= crit]

        from pyopencl.algorithm import partition
        true_dev, false_dev, count_true_dev, evt = partition(
                a_dev, "ary[i] > myval", [("myval", crit)])

        count_true_dev = count_true_dev.get()

        assert (true_dev.get()[:count_true_dev] == true_host).all()
        assert (false_dev.get()[:n-count_true_dev] == false_host).all()
Beispiel #11
0
def test_elwise_kernel_with_options(ctx_factory):
    from pyopencl.clrandom import rand as clrand
    from pyopencl.elementwise import ElementwiseKernel

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    in_gpu = clrand(queue, (50,), np.float32)

    options = ['-D', 'ADD_ONE']
    add_one = ElementwiseKernel(
        context,
        "float* out, const float *in",
        """
        out[i] = in[i]
        #ifdef ADD_ONE
            +1
        #endif
        ;
        """,
        options=options,
        )

    out_gpu = cl_array.empty_like(in_gpu)
    add_one(out_gpu, in_gpu)

    gt = in_gpu.get() + 1
    gv = out_gpu.get()
    assert la.norm(gv - gt) < 1e-5
Beispiel #12
0
def test_concatenate(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    a_dev = clrand(queue, (5, 15, 20), dtype=np.float32)
    b_dev = clrand(queue, (4, 15, 20), dtype=np.float32)
    c_dev = clrand(queue, (3, 15, 20), dtype=np.float32)
    a = a_dev.get()
    b = b_dev.get()
    c = c_dev.get()

    cat_dev = cl.array.concatenate((a_dev, b_dev, c_dev))
    cat = np.concatenate((a, b, c))

    assert la.norm(cat - cat_dev.get()) == 0
Beispiel #13
0
def test_slice(ctx_factory):
    if _PYPY:
        pytest.xfail("numpypy: spurious as_strided failure")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    tp = np.float32

    ary_len = 20000
    a_gpu = clrand(queue, (ary_len,), dtype=tp)
    b_gpu = clrand(queue, (ary_len,), dtype=tp)
    a = a_gpu.get()
    b = b_gpu.get()

    from random import randrange
    for i in range(20):
        start = randrange(ary_len)
        end = randrange(start, ary_len)

        a_gpu_slice = tp(2)*a_gpu[start:end]
        a_slice = tp(2)*a[start:end]

        assert la.norm(a_gpu_slice.get() - a_slice) == 0

    for i in range(20):
        start = randrange(ary_len)
        end = randrange(start, ary_len)

        a_gpu[start:end] = tp(2)*b[start:end]
        a[start:end] = tp(2)*b[start:end]

        assert la.norm(a_gpu.get() - a) == 0

    for i in range(20):
        start = randrange(ary_len)
        end = randrange(start, ary_len)

        a_gpu[start:end] = tp(2)*b_gpu[start:end]
        a[start:end] = tp(2)*b[start:end]

        assert la.norm(a_gpu.get() - a) == 0
Beispiel #14
0
def test_elwise_kernel(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    a_gpu = clrand(context, queue, (50,), numpy.float32)
    b_gpu = clrand(context, queue, (50,), numpy.float32)

    from pyopencl.elementwise import ElementwiseKernel
    lin_comb = ElementwiseKernel(context,
            "float a, float *x, float b, float *y, float *z",
            "z[i] = a*x[i] + b*y[i]",
            "linear_combination")

    c_gpu = cl_array.empty_like(a_gpu)
    lin_comb(5, a_gpu, 6, b_gpu, c_gpu)

    assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5
Beispiel #15
0
def test_slice(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    tp = np.float32

    l = 20000
    a_gpu = clrand(queue, (l,), dtype=tp)
    b_gpu = clrand(queue, (l,), dtype=tp)
    a = a_gpu.get()
    b = b_gpu.get()

    from random import randrange

    for i in range(20):
        start = randrange(l)
        end = randrange(start, l)

        a_gpu_slice = tp(2) * a_gpu[start:end]
        a_slice = tp(2) * a[start:end]

        assert la.norm(a_gpu_slice.get() - a_slice) == 0

    for i in range(20):
        start = randrange(l)
        end = randrange(start, l)

        a_gpu[start:end] = tp(2) * b[start:end]
        a[start:end] = tp(2) * b[start:end]

        assert la.norm(a_gpu.get() - a) == 0

    for i in range(20):
        start = randrange(l)
        end = randrange(start, l)

        a_gpu[start:end] = tp(2) * b_gpu[start:end]
        a[start:end] = tp(2) * b[start:end]

        assert la.norm(a_gpu.get() - a) == 0
Beispiel #16
0
def test_if_positive(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    ary_len = 20000
    a_gpu = clrand(queue, (ary_len,), np.float32)
    b_gpu = clrand(queue, (ary_len,), np.float32)
    a = a_gpu.get()
    b = b_gpu.get()

    max_a_b_gpu = cl_array.maximum(a_gpu, b_gpu)
    min_a_b_gpu = cl_array.minimum(a_gpu, b_gpu)

    print(max_a_b_gpu)
    print(np.maximum(a, b))

    assert la.norm(max_a_b_gpu.get() - np.maximum(a, b)) == 0
    assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0
Beispiel #17
0
def test_if_positive(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    l = 20000
    a_gpu = clrand(context, queue, (l,), numpy.float32)
    b_gpu = clrand(context, queue, (l,), numpy.float32)
    a = a_gpu.get()
    b = b_gpu.get()

    max_a_b_gpu = cl_array.maximum(a_gpu, b_gpu)
    min_a_b_gpu = cl_array.minimum(a_gpu, b_gpu)

    print(max_a_b_gpu)
    print(numpy.maximum(a, b))

    assert la.norm(max_a_b_gpu.get()- numpy.maximum(a, b)) == 0
    assert la.norm(min_a_b_gpu.get()- numpy.minimum(a, b)) == 0
Beispiel #18
0
def test_struct_reduce(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    mmc_dtype, mmc_c_decl = make_mmc_dtype(context.devices[0])

    preamble = mmc_c_decl + r"""//CL//

    minmax_collector mmc_neutral()
    {
        // FIXME: needs infinity literal in real use, ok here
        minmax_collector result;
        result.cur_min = 1<<30;
        result.cur_max = -(1<<30);
        return result;
    }

    minmax_collector mmc_from_scalar(float x)
    {
        minmax_collector result;
        result.cur_min = x;
        result.cur_max = x;
        return result;
    }

    minmax_collector agg_mmc(minmax_collector a, minmax_collector b)
    {
        minmax_collector result = a;
        if (b.cur_min < result.cur_min)
            result.cur_min = b.cur_min;
        if (b.cur_max > result.cur_max)
            result.cur_max = b.cur_max;
        return result;
    }

    """

    from pyopencl.clrandom import rand as clrand
    a_gpu = clrand(queue, (20000,), dtype=np.int32, a=0, b=10**6)
    a = a_gpu.get()

    from pyopencl.reduction import ReductionKernel
    red = ReductionKernel(context, mmc_dtype,
            neutral="mmc_neutral()",
            reduce_expr="agg_mmc(a, b)", map_expr="mmc_from_scalar(x[i])",
            arguments="__global int *x", preamble=preamble)

    minmax = red(a_gpu).get()
    #print minmax["cur_min"], minmax["cur_max"]
    #print np.min(a), np.max(a)

    assert abs(minmax["cur_min"] - np.min(a)) < 1e-5
    assert abs(minmax["cur_max"] - np.max(a)) < 1e-5
Beispiel #19
0
def test_diff(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    l = 20000
    a_dev = clrand(queue, (l,), dtype=np.float32)
    a = a_dev.get()

    err = la.norm((cl.array.diff(a_dev).get() - np.diff(a)))
    assert err < 1e-4
Beispiel #20
0
def test_sum(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    a_gpu = clrand(context, queue, (200000,), np.float32)
    a = a_gpu.get()

    sum_a = np.sum(a)
    sum_a_gpu = cl_array.sum(a_gpu).get()

    assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4
Beispiel #21
0
    def test_sum(ctx_getter):
        context = ctx_getter()
        queue = cl.CommandQueue(context)

        from pyopencl.clrandom import rand as clrand

        a_gpu = clrand(context, queue, (200000,))
        a = a_gpu.get()

        sum_a = numpy.sum(a)

        from pycuda.reduction import get_sum_kernel
        sum_a_gpu = cl_array.sum(a_gpu).get()

        assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4
Beispiel #22
0
def test_transpose(ctx_factory):
    if _PYPY:
        pytest.xfail("numpypy: no array creation from __array_interface__")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    a_gpu = clrand(queue, (10, 20, 30), dtype=np.float32)
    a = a_gpu.get()

    # FIXME: not contiguous
    #assert np.allclose(a_gpu.transpose((1,2,0)).get(), a.transpose((1,2,0)))
    assert np.array_equal(a_gpu.T.get(), a.T)
Beispiel #23
0
def test_astype(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    if not has_double_support(context.devices[0]):
        return

    a_gpu = clrand(context, queue, (2000,), dtype=numpy.float32)

    a = a_gpu.get().astype(numpy.float64)
    a2 = a_gpu.astype(numpy.float64).get()

    assert a2.dtype == numpy.float64
    assert la.norm(a - a2) == 0, (a, a2)

    a_gpu = clrand(context, queue, (2000,), dtype=numpy.float64)

    a = a_gpu.get().astype(numpy.float32)
    a2 = a_gpu.astype(numpy.float32).get()

    assert a2.dtype == numpy.float32
    assert la.norm(a - a2)/la.norm(a) < 1e-7
Beispiel #24
0
def test_newaxis(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    a_gpu = clrand(queue, (10, 20, 30), dtype=np.float32)
    a = a_gpu.get()

    b_gpu = a_gpu[:, np.newaxis]
    b = a[:, np.newaxis]

    assert b_gpu.shape == b.shape
    for i in range(b.ndim):
        if b.shape[i] > 1:
            assert b_gpu.strides[i] == b.strides[i]
Beispiel #25
0
    def test_slice(ctx_getter):
        from pyopencl.clrandom import rand as clrand

        l = 20000
        a_gpu = clrand(context, queue, (l,))
        a = a_gpu.get()

        from random import randrange
        for i in range(200):
            start = randrange(l)
            end = randrange(start, l)

            a_gpu_slice = a_gpu[start:end]
            a_slice = a[start:end]

            assert la.norm(a_gpu_slice.get()-a_slice) == 0
Beispiel #26
0
def test_view_and_strides(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    X = clrand(queue, (5, 10), dtype=np.float32)
    Y = X[:3, :5]
    y = Y.view()

    assert y.shape == Y.shape
    assert y.strides == Y.strides

    import pytest
    with pytest.raises(AssertionError):
        assert (y.get() == X.get()[:3, :5]).all()
Beispiel #27
0
def test_random(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    if has_double_support(context.devices[0]):
        dtypes = [numpy.float32, numpy.float64]
    else:
        dtypes = [numpy.float32]

    for dtype in dtypes:
        a = clrand(context, queue, (10, 100), dtype=dtype).get()

        assert (0 <= a).all()
        assert (a < 1).all()
Beispiel #28
0
def test_copy_if(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand
    for n in scan_test_counts:
        a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
        a = a_dev.get()

        from pyopencl.algorithm import copy_if

        crit = a_dev.dtype.type(300)
        selected = a[a>crit]
        selected_dev, count_dev = copy_if(a_dev, "ary[i] > myval", [("myval", crit)])

        assert (selected_dev.get()[:count_dev.get()] == selected).all()
        from gc import collect
        collect()
Beispiel #29
0
def test_view_and_strides(ctx_factory):
    if _PYPY:
        pytest.xfail("numpypy: no array creation from __array_interface__")
    return

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    x = clrand(queue, (5, 10), dtype=np.float32)
    y = x[:3, :5]
    yv = y.view()

    assert yv.shape == y.shape
    assert yv.strides == y.strides

    with pytest.raises(AssertionError):
        assert (yv.get() == x.get()[:3, :5]).all()
Beispiel #30
0
def test_view_and_strides(ctx_factory):
    if _PYPY:
        pytest.xfail("numpypy: no array creation from __array_interface__")
    return

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    x = clrand(queue, (5, 10), dtype=np.float32)
    y = x[:3, :5]
    yv = y.view()

    assert yv.shape == y.shape
    assert yv.strides == y.strides

    with pytest.raises(AssertionError):
        assert (yv.get() == x.get()[:3, :5]).all()
Beispiel #31
0
def no_test_slice(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    l = 20000
    a_gpu = clrand(queue, (l,))
    a = a_gpu.get()

    from random import randrange
    for i in range(200):
        start = randrange(l)
        end = randrange(start, l)

        a_gpu_slice = a_gpu[start:end]
        a_slice = a[start:end]

        assert la.norm(a_gpu_slice.get() - a_slice) == 0
Beispiel #32
0
def test_partition(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand
    for n in scan_test_counts:
        a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
        a = a_dev.get()

        crit = a_dev.dtype.type(300)
        true_host = a[a>crit]
        false_host = a[a<=crit]

        from pyopencl.algorithm import partition
        true_dev, false_dev, count_true_dev = partition(a_dev, "ary[i] > myval", [("myval", crit)])

        count_true_dev = count_true_dev.get()

        assert (true_dev.get()[:count_true_dev] == true_host).all()
        assert (false_dev.get()[:n-count_true_dev] == false_host).all()
Beispiel #33
0
def test_minmax(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    if has_double_support(context.devices[0]):
        dtypes = [np.float64, np.float32, np.int32]
    else:
        dtypes = [np.float32, np.int32]

    for what in ["min", "max"]:
        for dtype in dtypes:
            a_gpu = clrand(queue, (200000,), dtype)
            a = a_gpu.get()

            op_a = getattr(np, what)(a)
            op_a_gpu = getattr(cl_array, what)(a_gpu).get()

            assert op_a_gpu == op_a, (op_a_gpu, op_a, dtype, what)
Beispiel #34
0
def test_unique(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand
    for n in scan_test_counts:
        a_dev = clrand(queue, (n,), dtype=np.int32, a=0, b=1000)
        a = a_dev.get()
        a = np.sort(a)
        a_dev = cl_array.to_device(queue, a)

        a_unique_host = np.unique(a)

        from pyopencl.algorithm import unique
        a_unique_dev, count_unique_dev = unique(a_dev)

        count_unique_dev = count_unique_dev.get()

        assert (a_unique_dev.get()[:count_unique_dev] == a_unique_host).all()
        from gc import collect
        collect()
Beispiel #35
0
def test_event_management(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    x = clrand(queue, (5, 10), dtype=np.float32)
    assert len(x.events) == 1, len(x.events)

    x.finish()

    assert len(x.events) == 0

    y = x + x
    assert len(y.events) == 1
    y = x * x
    assert len(y.events) == 1
    y = 2 * x
    assert len(y.events) == 1
    y = 2 / x
    assert len(y.events) == 1
    y = x / 2
    assert len(y.events) == 1
    y = x**2
    assert len(y.events) == 1
    y = 2**x
    assert len(y.events) == 1

    for i in range(10):
        x.fill(0)

    assert len(x.events) == 10

    for i in range(1000):
        x.fill(0)

    assert len(x.events) < 100
Beispiel #36
0
    def test_subset_minmax(ctx_getter):
        context = ctx_getter()
        queue = cl.CommandQueue(context)

        from pyopencl.clrandom import rand as clrand

        l_a = 200000
        gran = 5
        l_m = l_a - l_a // gran + 1

        if has_double_support():
            dtypes = [numpy.float64, numpy.float32, numpy.int32]
        else:
            dtypes = [numpy.float32, numpy.int32]

        for dtype in dtypes:
            a_gpu = clrand(context, queue, (l_a, ), dtype)
            a = a_gpu.get()

            meaningful_indices_gpu = cl_array.zeros(l_m, dtype=numpy.int32)
            meaningful_indices = meaningful_indices_gpu.get()
            j = 0
            for i in range(len(meaningful_indices)):
                meaningful_indices[i] = j
                j = j + 1
                if j % gran == 0:
                    j = j + 1

            meaningful_indices_gpu = cl_array.to_device(meaningful_indices)
            b = a[meaningful_indices]

            min_a = numpy.min(b)
            min_a_gpu = cl_array.subset_min(meaningful_indices_gpu,
                                            a_gpu).get()

            assert min_a_gpu == min_a
Beispiel #37
0
def test_bitwise(ctx_factory):
    if _PYPY:
        pytest.xfail("numpypy: missing bitwise ops")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from itertools import product

    dtypes = [np.dtype(t) for t in (np.int64, np.int32, np.int16, np.int8)]

    from pyopencl.clrandom import rand as clrand

    for a_dtype, b_dtype in product(dtypes, dtypes):
        ary_len = 16

        np.random.seed(10)

        int32_min = np.iinfo(np.int32).min
        int32_max = np.iinfo(np.int32).max

        a_dev = clrand(queue, (ary_len, ),
                       a=int32_min,
                       b=1 + int32_max,
                       dtype=np.int64).astype(a_dtype)
        b_dev = clrand(queue, (ary_len, ),
                       a=int32_min,
                       b=1 + int32_max,
                       dtype=np.int64).astype(b_dtype)

        a = a_dev.get()
        b = b_dev.get()
        s = int(
            clrand(queue, (), a=int32_min, b=1 + int32_max,
                   dtype=np.int64).astype(b_dtype).get())

        import operator as o

        for op in [o.and_, o.or_, o.xor]:
            res_dev = op(a_dev, b_dev)
            res = op(a, b)

            assert (res_dev.get() == res).all()

            res_dev = op(a_dev, s)
            res = op(a, s)

            assert (res_dev.get() == res).all()

            res_dev = op(s, b_dev)
            res = op(s, b)

            assert (res_dev.get() == res).all()

        for op in [o.iand, o.ior, o.ixor]:
            res_dev = a_dev.copy()
            op_res = op(res_dev, b_dev)
            assert op_res is res_dev

            res = a.copy()
            op(res, b)

            assert (res_dev.get() == res).all()

            res_dev = a_dev.copy()
            op_res = op(res_dev, s)
            assert op_res is res_dev
            res = a.copy()
            op(res, s)

            assert (res_dev.get() == res).all()

        # Test unary ~
        res_dev = ~a_dev
        res = ~a  # pylint:disable=invalid-unary-operand-type
        assert (res_dev.get() == res).all()
Beispiel #38
0
    }

    minmax_collector agg_mmc(minmax_collector a, minmax_collector b)
    {
        minmax_collector result = a;
        if (b.cur_min < result.cur_min)
            result.cur_min = b.cur_min;
        if (b.cur_max > result.cur_max)
            result.cur_max = b.cur_max;
        return result;
    }

    """

from pyopencl.clrandom import rand as clrand
a_gpu = clrand(queue, (20000, ), dtype=np.int32, a=0, b=10**6)
a = a_gpu.get()

from pyopencl.reduction import ReductionKernel
red = ReductionKernel(ctx,
                      mmc_dtype,
                      neutral="mmc_neutral()",
                      reduce_expr="agg_mmc(a, b)",
                      map_expr="mmc_from_scalar(x[i])",
                      arguments="__global int *x",
                      preamble=preamble)

minmax = red(a_gpu).get()

assert abs(minmax["cur_min"] - np.min(a)) < 1e-5
assert abs(minmax["cur_max"] - np.max(a)) < 1e-5