def test_compose(): a1 = Range.from_string('0, 0:N, 10:20') a2 = Range.from_string('0, 0:N, 5:10') a_res = Range.from_string('0, 0:N, 15:20') assert a_res == a1.compose(a2) b1 = Range.from_string('0,0,0:M,0:N') b2 = Range.from_string('0,0,0,0:N') b_res = Range.from_string('0,0,0,0:N') assert b_res == b1.compose(b2) c1 = Range.from_string('0, 0:N, 0:M, 50:100') c2 = Range.from_string('0, 0, 0, 20:40') c3 = Indices.from_string('0 , 0 , 0 , 0') c_res1 = Range.from_string('0, 0, 0, 70:90') c_res2 = Indices.from_string('0, 0, 0, 50') assert c_res1 == c1.compose(c2) assert c_res2 == c1.compose(c3) d1 = Range.from_string('i,j,0:N') d2 = Indices.from_string('0,0,k') d_res = Indices.from_string('i,j,k') assert d_res == d1.compose(d2)
def test_squeeze_unsqueeze_indices(): a1 = Indices.from_string('i, 0') expected_squeezed = [1] a2 = deepcopy(a1) not_squeezed = a2.squeeze(ignore_indices=[0]) squeezed = [i for i in range(len(a1)) if i not in not_squeezed] unsqueezed = a2.unsqueeze(squeezed) assert (squeezed == unsqueezed) assert (expected_squeezed == squeezed) assert (a1 == a2) b1 = Indices.from_string('0, i') expected_squeezed = [0] b2 = deepcopy(b1) not_squeezed = b2.squeeze(ignore_indices=[1]) squeezed = [i for i in range(len(b1)) if i not in not_squeezed] unsqueezed = b2.unsqueeze(squeezed) assert (squeezed == unsqueezed) assert (expected_squeezed == squeezed) assert (b1 == b2) c1 = Indices.from_string('i, 0, 0') expected_squeezed = [1, 2] c2 = deepcopy(c1) not_squeezed = c2.squeeze(ignore_indices=[0]) squeezed = [i for i in range(len(c1)) if i not in not_squeezed] unsqueezed = c2.unsqueeze(squeezed) assert (squeezed == unsqueezed) assert (expected_squeezed == squeezed) assert (c1 == c2) d1 = Indices.from_string('0, i, 0') expected_squeezed = [0, 2] d2 = deepcopy(d1) not_squeezed = d2.squeeze(ignore_indices=[1]) squeezed = [i for i in range(len(d1)) if i not in not_squeezed] unsqueezed = d2.unsqueeze(squeezed) assert (squeezed == unsqueezed) assert (expected_squeezed == squeezed) assert (d1 == d2) e1 = Indices.from_string('0, 0, i') expected_squeezed = [0, 1] e2 = deepcopy(e1) not_squeezed = e2.squeeze(ignore_indices=[2]) squeezed = [i for i in range(len(e1)) if i not in not_squeezed] unsqueezed = e2.unsqueeze(squeezed) assert (squeezed == unsqueezed) assert (expected_squeezed == squeezed) assert (e1 == e2)
def make_read_sdfg(): sdfg = SDFG("filter_read") state = make_iteration_space(sdfg) A = state.add_array( "A_mem", [N], dtype=dtype, storage=StorageType.FPGA_Global) A_pipe = state.add_stream( "_A_pipe", dtype=dtype, buffer_size=buffer_size, veclen=W.get(), storage=StorageType.FPGA_Local) state.add_memlet_path( A, A_pipe, memlet=Memlet( A_pipe, 1, Indices(["0"]), W.get(), other_subset=Indices(["i"]))) return sdfg
def make_sdfg(specialize): if specialize: sdfg = SDFG("histogram_fpga_parallel_{}_{}x{}".format( P.get(), H.get(), W.get())) else: sdfg = SDFG("histogram_fpga_parallel_{}".format(P.get())) copy_to_fpga_state = make_copy_to_fpga_state(sdfg) state = sdfg.add_state("compute") # Compute module nested_sdfg = make_compute_nested_sdfg(state) tasklet = state.add_nested_sdfg(nested_sdfg, sdfg, {"A_pipe_in"}, {"hist_pipe_out"}) A_pipes_out = state.add_stream("A_pipes", dtype, shape=(P, ), transient=True, storage=StorageType.FPGA_Local) A_pipes_in = state.add_stream("A_pipes", dtype, shape=(P, ), transient=True, storage=StorageType.FPGA_Local) hist_pipes_out = state.add_stream("hist_pipes", itype, shape=(P, ), transient=True, storage=StorageType.FPGA_Local) unroll_entry, unroll_exit = state.add_map( "unroll_compute", {"p": "0:P"}, schedule=dace.ScheduleType.FPGA_Device, unroll=True) state.add_memlet_path(unroll_entry, A_pipes_in, memlet=EmptyMemlet()) state.add_memlet_path(hist_pipes_out, unroll_exit, memlet=EmptyMemlet()) state.add_memlet_path(A_pipes_in, tasklet, dst_conn="A_pipe_in", memlet=Memlet.simple(A_pipes_in, "p", num_accesses="W*H")) state.add_memlet_path(tasklet, hist_pipes_out, src_conn="hist_pipe_out", memlet=Memlet.simple(hist_pipes_out, "p", num_accesses="num_bins")) # Read module a_device = state.add_array("A_device", (H, W), dtype, transient=True, storage=dace.dtypes.StorageType.FPGA_Global) read_entry, read_exit = state.add_map("read_map", { "h": "0:H", "w": "0:W:P" }, schedule=ScheduleType.FPGA_Device) a_val = state.add_array("A_val", (P, ), dtype, transient=True, storage=StorageType.FPGA_Local) read_unroll_entry, read_unroll_exit = state.add_map( "read_unroll", {"p": "0:P"}, schedule=ScheduleType.FPGA_Device, unroll=True) read_tasklet = state.add_tasklet("read", {"A_in"}, {"A_pipe"}, "A_pipe = A_in[p]") state.add_memlet_path(a_device, read_entry, a_val, memlet=Memlet(a_val, num_accesses=1, subset=Indices(["0"]), vector_length=P.get(), other_subset=Indices(["h", "w"]))) state.add_memlet_path(a_val, read_unroll_entry, read_tasklet, dst_conn="A_in", memlet=Memlet.simple(a_val, "0", veclen=P.get(), num_accesses=1)) state.add_memlet_path(read_tasklet, read_unroll_exit, read_exit, A_pipes_out, src_conn="A_pipe", memlet=Memlet.simple(A_pipes_out, "p")) # Write module hist_pipes_in = state.add_stream("hist_pipes", itype, shape=(P, ), transient=True, storage=StorageType.FPGA_Local) hist_device_out = state.add_array( "hist_device", (num_bins, ), itype, transient=True, storage=dace.dtypes.StorageType.FPGA_Global) merge_entry, merge_exit = state.add_map("merge", {"nb": "0:num_bins"}, schedule=ScheduleType.FPGA_Device) merge_reduce = state.add_reduce("lambda a, b: a + b", (0, ), "0", schedule=ScheduleType.FPGA_Device) state.add_memlet_path(hist_pipes_in, merge_entry, merge_reduce, memlet=Memlet.simple(hist_pipes_in, "0:P", num_accesses=P)) state.add_memlet_path(merge_reduce, merge_exit, hist_device_out, memlet=dace.memlet.Memlet.simple( hist_device_out, "nb")) copy_to_host_state = make_copy_to_host_state(sdfg) sdfg.add_edge(copy_to_fpga_state, state, dace.graph.edges.InterstateEdge()) sdfg.add_edge(state, copy_to_host_state, dace.graph.edges.InterstateEdge()) return sdfg