def test_default_stream_blas_node(): A_desc = dace.float32[10, 5] B_desc = dace.float32[5, 3] C_desc = dace.float32[10, 3] with set_temporary("compiler", "cuda", "max_concurrent_streams", value=-1): with change_default(blas, "cuBLAS"): @dace.program def test_default_stream_blas_node(A: A_desc, B: B_desc, C: C_desc): C[:] = A @ B A = np.random.rand(*A_desc.shape).astype(np.float32) B = np.random.rand(*B_desc.shape).astype(np.float32) C = np.zeros(C_desc.shape).astype(np.float32) sdfg: dace.SDFG = test_default_stream_blas_node.to_sdfg() sdfg.apply_gpu_transformations() sdfg.expand_library_nodes() all_tasklets = (n for n, _ in sdfg.all_nodes_recursive() if isinstance(n, dace.nodes.Tasklet)) environments = { env for n in all_tasklets for env in n.environments } assert "cuBLAS" in environments sdfg(A=A, B=B, C=C) assert np.allclose(A @ B, C)
def test_batchmm(): b, m, n, k = tuple(dace.symbol(k) for k in 'bmnk') with change_default(blas, "cuBLAS"): @dace.program def bmmtest(A: dace.float64[b, m, k], B: dace.float64[b, k, n], C: dace.float64[b, m, n]): C[:] = A @ B sdfg = bmmtest.to_sdfg() sdfg.apply_gpu_transformations() csdfg = sdfg.compile() b, m, n, k = 3, 32, 31, 30 x = np.random.rand(b, m, k) y = np.random.rand(b, k, n) z = np.zeros([b, m, n], np.float64) csdfg(A=x, B=y, C=z, b=b, m=m, n=n, k=k) ref = x @ y diff = np.linalg.norm(ref - z) print('Difference:', diff) assert diff < 1e-6
def test_layouts(dl): with change_default(blas, "cuBLAS"): _test_matmul('cuBLAS float ' + dl, dace.float32, 'cuBLAS', dace.StorageType.GPU_Global, data_layout=dl)
def test_change_default(): old_default = blas.default_implementation blas.default_implementation = "hello" with change_default(blas, "MKL"): assert blas.default_implementation == "MKL" assert blas.default_implementation == "hello" blas.default_implementation = old_default
def test_gemm_fails_storage_mkl(): with change_default(blas, "MKL"): with pytest.raises(ValueError) as info: @dace.program def test_failing_mkl(A: dace.float32[10, 5], B: dace.float32[5, 3], C: dace.float32[10, 3]): C[:] = A @ B sdfg = test_failing_mkl.to_sdfg() sdfg.apply_gpu_transformations() A = np.random.rand(10, 5).astype(np.float32) B = np.random.rand(5, 3).astype(np.float32) C = np.zeros((10, 3)).astype(np.float32) sdfg(A=A, B=B, C=C) assert "cannot access" in str(info.value)
def test_types(): with change_default(blas, "cuBLAS"): # Try different data types _test_matmul('cuBLAS double', dace.float64, 'cuBLAS', dace.StorageType.GPU_Global, eps=1e-6) _test_matmul('cuBLAS half', dace.float16, 'cuBLAS', dace.StorageType.GPU_Global, eps=1) _test_matmul('cuBLAS scmplx', dace.complex64, 'cuBLAS', dace.StorageType.GPU_Global) _test_matmul('cuBLAS dcmplx', dace.complex128, 'cuBLAS', dace.StorageType.GPU_Global, eps=1e-6)
def test_3x2(impl): A_desc = dace.float32[8, 10, 12] B_desc = dace.float32[12, 5] C_desc = dace.float32[8, 10, 5] with change_default(blas, impl): @dace.program def test_3x2(A: A_desc, B: B_desc, C: C_desc): C[:] = np.einsum("aik,kj->aij", A, B) A = np.random.rand(*A_desc.shape).astype(np.float32) B = np.random.rand(*B_desc.shape).astype(np.float32) C = np.zeros(C_desc.shape).astype(np.float32) sdfg: dace.SDFG = test_3x2.to_sdfg() sdfg.name = impl + "_einsum_3x2" if impl == "cuBLAS": sdfg.apply_gpu_transformations() sdfg.expand_library_nodes() assert_used_environment(sdfg, impl) sdfg(A=A, B=B, C=C) assert np.allclose(A @ B, C)