コード例 #1
0
ファイル: matmul_test.py プロジェクト: am-ivanov/dace
def test_gemm_vectorized_decoupled():
    # Test with vectorization
    A = np.random.rand(128, 128).astype(np.float32)
    B = np.random.rand(128, 128).astype(np.float32)
    C = np.random.rand(128, 128).astype(np.float32)
    alpha = 2.1
    beta = 1.5
    vec_width = 4
    sdfg = create_gemm_sdfg("gemm_vectorized",
                            alpha,
                            beta,
                            A,
                            B,
                            C,
                            dace.float32,
                            vec_width=vec_width)
    sdfg.expand_library_nodes()
    sdfg.apply_transformations_repeated([InlineSDFG])
    # Compute ground truth
    C_regression = alpha * (A @ B) + beta * C
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        sdfg(A=A, B=B, C=C)
    assert np.allclose(C, C_regression, atol=1e-6)
    return sdfg
コード例 #2
0
ファイル: matmul_test.py プロジェクト: am-ivanov/dace
def test_gemm_size_not_multiples_of_decoupled():
    # Test with matrix sizes that are not a multiple of #PEs and Tile sizes
    # To achieve II=1 with Xilinx, we need to decouple reads/writes from memory
    A = np.random.rand(120, 128).astype(np.float32)
    B = np.random.rand(128, 128).astype(np.float32)
    C = np.random.rand(120, 128).astype(np.float32)
    expansion_args = {"tile_size_m": 50, "num_pes": 7}
    sdfg = create_gemm_sdfg("gemm_not_multiple_of",
                            1,
                            1,
                            A,
                            B,
                            C,
                            dace.float32,
                            expansion_args=expansion_args)
    sdfg.expand_library_nodes()
    sdfg.apply_transformations_repeated([InlineSDFG])
    # compute ground truth
    C_regression = A @ B + C
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        sdfg(A=A, B=B, C=C)
    assert np.allclose(C, C_regression, atol=1e-6)
    return sdfg
コード例 #3
0
ファイル: vec_sum_test.py プロジェクト: am-ivanov/dace
def test_vec_sum_fpga_transform_first_decoupled_interfaces():
    # For this test, decoupled read/write interfaces are needed to achieve II=1
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        return run_vec_sum(True)
コード例 #4
0
def test_default_stream_blas_node():
    A_desc = dace.float32[10, 5]
    B_desc = dace.float32[5, 3]
    C_desc = dace.float32[10, 3]
    with set_temporary("compiler", "cuda", "max_concurrent_streams", value=-1):
        with change_default(blas, "cuBLAS"):

            @dace.program
            def test_default_stream_blas_node(A: A_desc, B: B_desc, C: C_desc):
                C[:] = A @ B

            A = np.random.rand(*A_desc.shape).astype(np.float32)
            B = np.random.rand(*B_desc.shape).astype(np.float32)
            C = np.zeros(C_desc.shape).astype(np.float32)

            sdfg: dace.SDFG = test_default_stream_blas_node.to_sdfg()
            sdfg.apply_gpu_transformations()
            sdfg.expand_library_nodes()

            all_tasklets = (n for n, _ in sdfg.all_nodes_recursive()
                            if isinstance(n, dace.nodes.Tasklet))
            environments = {
                env
                for n in all_tasklets for env in n.environments
            }

            assert "cuBLAS" in environments

            sdfg(A=A, B=B, C=C)
            assert np.allclose(A @ B, C)
コード例 #5
0
def test_inhibit_state_fusion():
    """ Tests that state fusion is inhibited around callbacks if configured as such. """
    @dace_inhibitor
    def add(a, b):
        return a + b

    @dace.program
    def calladd(A: dace.float64[20], B: dace.float64[20], C: dace.float64[20],
                D: dace.float64[20]):
        A[:] = add(B, C)
        D[:] = add(A, C)

    with config.set_temporary('frontend', 'dont_fuse_callbacks', value=True):
        sdfg = calladd.to_sdfg(simplify=True)
        assert sdfg.number_of_nodes() == 5

    with config.set_temporary('frontend', 'dont_fuse_callbacks', value=False):
        sdfg = calladd.to_sdfg(simplify=True)
        assert sdfg.number_of_nodes() == 1
コード例 #6
0
def test_fusion_with_transient_fpga_decoupled():

    A = np.random.rand(2, 20)
    expected = A * A * 2
    sdfg = fusion_with_transient.to_sdfg()
    sdfg.simplify()
    assert sdfg.apply_transformations_repeated(MapFusion) >= 2
    assert sdfg.apply_transformations_repeated(FPGATransformSDFG) == 1
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        sdfg(A=A)
    assert np.allclose(A, expected)
    return sdfg
コード例 #7
0
def test_map_unroll_processing_elements_decoupled():
    # Grab the systolic GEMM implementation the samples directory

    spec = importlib.util.spec_from_file_location(
        "gemm",
        Path(__file__).parent.parent.parent / "samples" / "fpga" /
        "gemm_systolic_vectorized.py")
    gemm = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(gemm)

    N = 128
    K = 256
    M = 512
    P = 8
    W = 4
    TN = 32
    TM = 128

    # Create an SDFG with multiple processing elements
    sdfg = gemm.make_sdfg("map_unroll_processing_elements",
                          dace.vector(dace.float32, W))
    sdfg.specialize({"P": P, "W": W, "TN": TN, "TM": TM})
    for state in sdfg.states():
        for node in state.nodes():
            if isinstance(node, nodes.MapEntry) and node.params == ["p"]:
                node.unroll = False
                node.schedule = dace.ScheduleType.Unrolled

    # Initialize arrays: Randomize A and B, zero C
    A = np.ndarray([N, K], dtype=dace.float32.type)
    B = np.ndarray([K, M], dtype=dace.float32.type)
    C = np.ndarray([N, M], dtype=dace.float32.type)
    A[:] = np.random.rand(N, K).astype(dace.float32.type)
    B[:] = np.random.rand(K, M).astype(dace.float32.type)
    C[:] = np.random.rand(N, M).astype(dace.float32.type)

    C_regression = A @ B + C

    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        sdfg(A=A, B=B, C=C, N=N, M=M, K=K)
    diff = np.linalg.norm(C_regression - C) / float(N * M)
    if not np.allclose(C_regression, C):
        raise ValueError("Verification failed.")

    return sdfg
コード例 #8
0
def four_interface_to_2_banks(mem_type, decouple_interfaces):
    sdfg = SDFG("test_4_interface_to_2_banks_" + mem_type)
    state = sdfg.add_state()

    _, desc_a = sdfg.add_array("a", [2, 2], dace.int32)
    desc_a.location["memorytype"] = mem_type
    desc_a.location["bank"] = "0:2"
    acc_read1 = state.add_read("a")
    acc_write1 = state.add_write("a")

    t1 = state.add_tasklet("r1", set(["_x1", "_x2"]), set(["_y1"]), "_y1 = _x1 + _x2")

    m1_in, m1_out = state.add_map("m", {"k": "0:2"}, dtypes.ScheduleType.Unrolled)

    state.add_memlet_path(acc_read1, m1_in, t1, memlet=memlet.Memlet("a[0, 0]"), dst_conn="_x1")
    state.add_memlet_path(acc_read1, m1_in, t1, memlet=memlet.Memlet("a[1, 0]"), dst_conn="_x2")
    state.add_memlet_path(t1, m1_out, acc_write1, memlet=memlet.Memlet("a[0, 1]"), src_conn="_y1")

    sdfg.apply_fpga_transformations()
    assert sdfg.apply_transformations(InlineSDFG) == 1
    assert sdfg.apply_transformations(MapUnroll) == 1
    for node in sdfg.states()[0].nodes():
        if isinstance(node, dace.sdfg.nodes.Tasklet):
            sdfg.states()[0].out_edges(node)[0].data.subset = subsets.Range.from_string("1, 1")
            break

    with set_temporary("compiler", "xilinx", "decouple_array_interfaces", value=decouple_interfaces):
        bank_assignment = sdfg.generate_code()[3].clean_code
        # if we are not decoupling array interfaces we will use less mem interfaces
        assert bank_assignment.count("sp") == 6 if decouple_interfaces else 4
        assert bank_assignment.count(mem_type + "[0]") == 3 if decouple_interfaces else 2
        assert bank_assignment.count(mem_type + "[1]") == 3 if decouple_interfaces else 2

    a = np.zeros([2, 2], np.int32)
    a[0, 0] = 2
    a[1, 0] = 3
    sdfg(a=a)
    assert a[0, 1] == 5

    return sdfg
コード例 #9
0
ファイル: config_test.py プロジェクト: mfkiwl/dace
def test_set_temporary():
    path = ["compiler", "build_type"]
    current_value = Config.get(*path)
    with set_temporary(*path, value="I'm not a build type"):
        assert Config.get(*path) == "I'm not a build type"
    assert Config.get(*path) == current_value
コード例 #10
0
def test_xilinx_decoupled_array_interfaces():
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        return run_atax(dace.dtypes.DeviceType.FPGA)
コード例 #11
0
def test_hbm_reduce_2x3_2b_decouple_array_interfaces():
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        return exec_test(2, 3, 2, "hbm", "red_2x3_2b_decoupled")
コード例 #12
0
def test_ddr_reduce_red_2x40_6b_decouple_array_interfaces():
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        return exec_test(2, 40, 6, "ddr", "red_2x40_6b_decoupled")
コード例 #13
0
def test_ddr_reduce_red_1x50_1b_decouple_array_interfaces():
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        return exec_test(1, 50, 1, "ddr", "red_1x50_1b_decoupled")
コード例 #14
0
def test_hbm_reduce_red_1x40_8b_decouple_array_interfaces():
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        return exec_test(1, 40, 8, "hbm", "red_1x40_8b_decoupled")
コード例 #15
0
def test_hbm_reduce_10x50_4b_decouple_array_interfaces():
    with set_temporary("compiler",
                       "xilinx",
                       "decouple_array_interfaces",
                       value=True):
        return exec_test(10, 50, 4, "hbm", "red_10x50_4b_decoupled")
コード例 #16
0
ファイル: dot_test.py プロジェクト: am-ivanov/dace
def test_dot_xilinx_decoupled():
    with set_temporary("compiler", "xilinx", "decouple_array_interfaces", value=True):
        return run_test("xilinx", 64, 16)