Esempio n. 1
0
def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtypes = [np.float32, np.complex64]
    if has_double_support(context.devices[0]):
        dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000,), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000,), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            vdot_ab = np.vdot(a, b)
            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            assert abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) < 1e-4
Esempio n. 2
0
def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtypes = [np.float32, np.complex64]
    if has_double_support(context.devices[0]):
        dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000, ), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000, ), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            vdot_ab = np.vdot(a, b)
            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            assert abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) < 1e-4
Esempio n. 3
0
def test_sum(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 200000
    for dtype in [np.float32, np.complex64]:
        a_gpu = general_clrand(queue, (n,), dtype)

        a = a_gpu.get()

        for slc in [
                slice(None),
                slice(1000, 3000),
                slice(1000, -3000),
                slice(1000, None),
                slice(1000, None, 3),
                slice(1000, 1000),
                ]:
            sum_a = np.sum(a[slc])

            if sum_a:
                ref_divisor = abs(sum_a)
            else:
                ref_divisor = 1

            if slc.step is None:
                sum_a_gpu = cl_array.sum(a_gpu[slc]).get()
                assert abs(sum_a_gpu - sum_a) / ref_divisor < 1e-4

            sum_a_gpu_2 = cl_array.sum(a_gpu, slice=slc).get()
            assert abs(sum_a_gpu_2 - sum_a) / ref_divisor < 1e-4
Esempio n. 4
0
def test_sum(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 200000
    for dtype in [np.float32, np.complex64]:
        a_gpu = general_clrand(queue, (n,), dtype)

        a = a_gpu.get()

        for slc in [
                slice(None),
                slice(1000, 3000),
                slice(1000, -3000),
                slice(1000, None),
                slice(1000, None, 3),
                ]:
            sum_a = np.sum(a[slc])

            if slc.step is None:
                sum_a_gpu = cl_array.sum(a_gpu[slc]).get()
                assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4

            sum_a_gpu_2 = cl_array.sum(a_gpu, slice=slc).get()
            assert abs(sum_a_gpu_2 - sum_a) / abs(sum_a) < 1e-4
Esempio n. 5
0
def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dev = context.devices[0]

    dtypes = [np.float32, np.complex64]
    if has_double_support(dev):
        if has_struct_arg_count_bug(dev) == "apple":
            dtypes.extend([np.float64])
        else:
            dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000,), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000,), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            try:
                vdot_ab = np.vdot(a, b)
            except NotImplementedError:
                import sys
                is_pypy = "__pypy__" in sys.builtin_module_names
                if is_pypy:
                    print("PYPY: VDOT UNIMPLEMENTED")
                    continue
                else:
                    raise

            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            rel_err = abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab)
            assert rel_err < 1e-4, rel_err
Esempio n. 6
0
def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dev = context.devices[0]

    dtypes = [np.float32, np.complex64]
    if has_double_support(dev):
        if has_struct_arg_count_bug(dev) == "apple":
            dtypes.extend([np.float64])
        else:
            dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000,), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000,), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            try:
                vdot_ab = np.vdot(a, b)
            except NotImplementedError:
                import sys
                is_pypy = '__pypy__' in sys.builtin_module_names
                if is_pypy:
                    print("PYPY: VDOT UNIMPLEMENTED")
                    continue
                else:
                    raise

            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            rel_err = abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab)
            assert rel_err < 1e-4, rel_err
Esempio n. 7
0
def test_sum(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 200000
    for dtype in [np.float32, np.complex64]:
        a_gpu = general_clrand(queue, (n,), dtype)

        a = a_gpu.get()

        sum_a = np.sum(a)
        sum_a_gpu = cl_array.sum(a_gpu).get()

        assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4
Esempio n. 8
0
def test_sum(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 200000
    for dtype in [np.float32, np.complex64]:
        a_gpu = general_clrand(queue, (n, ), dtype)

        a = a_gpu.get()

        sum_a = np.sum(a)
        sum_a_gpu = cl_array.sum(a_gpu).get()

        assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4