Beispiel #1
0
def test_default_stream_blas_node():
    A_desc = dace.float32[10, 5]
    B_desc = dace.float32[5, 3]
    C_desc = dace.float32[10, 3]
    with set_temporary("compiler", "cuda", "max_concurrent_streams", value=-1):
        with change_default(blas, "cuBLAS"):

            @dace.program
            def test_default_stream_blas_node(A: A_desc, B: B_desc, C: C_desc):
                C[:] = A @ B

            A = np.random.rand(*A_desc.shape).astype(np.float32)
            B = np.random.rand(*B_desc.shape).astype(np.float32)
            C = np.zeros(C_desc.shape).astype(np.float32)

            sdfg: dace.SDFG = test_default_stream_blas_node.to_sdfg()
            sdfg.apply_gpu_transformations()
            sdfg.expand_library_nodes()

            all_tasklets = (n for n, _ in sdfg.all_nodes_recursive()
                            if isinstance(n, dace.nodes.Tasklet))
            environments = {
                env
                for n in all_tasklets for env in n.environments
            }

            assert "cuBLAS" in environments

            sdfg(A=A, B=B, C=C)
            assert np.allclose(A @ B, C)
Beispiel #2
0
def test_batchmm():
    b, m, n, k = tuple(dace.symbol(k) for k in 'bmnk')

    with change_default(blas, "cuBLAS"):

        @dace.program
        def bmmtest(A: dace.float64[b, m, k], B: dace.float64[b, k, n],
                    C: dace.float64[b, m, n]):
            C[:] = A @ B

        sdfg = bmmtest.to_sdfg()
        sdfg.apply_gpu_transformations()
        csdfg = sdfg.compile()

        b, m, n, k = 3, 32, 31, 30

        x = np.random.rand(b, m, k)
        y = np.random.rand(b, k, n)
        z = np.zeros([b, m, n], np.float64)
        csdfg(A=x, B=y, C=z, b=b, m=m, n=n, k=k)

    ref = x @ y

    diff = np.linalg.norm(ref - z)
    print('Difference:', diff)
    assert diff < 1e-6
Beispiel #3
0
def test_layouts(dl):
    with change_default(blas, "cuBLAS"):
        _test_matmul('cuBLAS float ' + dl,
                     dace.float32,
                     'cuBLAS',
                     dace.StorageType.GPU_Global,
                     data_layout=dl)
Beispiel #4
0
def test_change_default():
    old_default = blas.default_implementation

    blas.default_implementation = "hello"

    with change_default(blas, "MKL"):
        assert blas.default_implementation == "MKL"
    assert blas.default_implementation == "hello"
    blas.default_implementation = old_default
Beispiel #5
0
def test_gemm_fails_storage_mkl():

    with change_default(blas, "MKL"):
        with pytest.raises(ValueError) as info:

            @dace.program
            def test_failing_mkl(A: dace.float32[10, 5], B: dace.float32[5, 3], C: dace.float32[10, 3]):
                C[:] = A @ B

            sdfg = test_failing_mkl.to_sdfg()
            sdfg.apply_gpu_transformations()
            A = np.random.rand(10, 5).astype(np.float32)
            B = np.random.rand(5, 3).astype(np.float32)
            C = np.zeros((10, 3)).astype(np.float32)
            sdfg(A=A, B=B, C=C)
        assert "cannot access" in str(info.value)
Beispiel #6
0
def test_types():
    with change_default(blas, "cuBLAS"):
        # Try different data types
        _test_matmul('cuBLAS double',
                     dace.float64,
                     'cuBLAS',
                     dace.StorageType.GPU_Global,
                     eps=1e-6)
        _test_matmul('cuBLAS half',
                     dace.float16,
                     'cuBLAS',
                     dace.StorageType.GPU_Global,
                     eps=1)
        _test_matmul('cuBLAS scmplx', dace.complex64, 'cuBLAS',
                     dace.StorageType.GPU_Global)
        _test_matmul('cuBLAS dcmplx',
                     dace.complex128,
                     'cuBLAS',
                     dace.StorageType.GPU_Global,
                     eps=1e-6)
Beispiel #7
0
def test_3x2(impl):
    A_desc = dace.float32[8, 10, 12]
    B_desc = dace.float32[12, 5]
    C_desc = dace.float32[8, 10, 5]
    with change_default(blas, impl):

        @dace.program
        def test_3x2(A: A_desc, B: B_desc, C: C_desc):
            C[:] = np.einsum("aik,kj->aij", A, B)

        A = np.random.rand(*A_desc.shape).astype(np.float32)
        B = np.random.rand(*B_desc.shape).astype(np.float32)
        C = np.zeros(C_desc.shape).astype(np.float32)

        sdfg: dace.SDFG = test_3x2.to_sdfg()
        sdfg.name = impl + "_einsum_3x2"
        if impl == "cuBLAS":
            sdfg.apply_gpu_transformations()
        sdfg.expand_library_nodes()

        assert_used_environment(sdfg, impl)

        sdfg(A=A, B=B, C=C)
        assert np.allclose(A @ B, C)