def make_sdfg_1d(implementation: str, vector_length: int): vtype = dace.vector(dace.typeclass(DTYPE), vector_length) if vector_length > 1 else DTYPE sdfg = dace.SDFG(f"stencil_node_test_1d_w{vector_length}") _, a_desc = sdfg.add_array("a", (SIZE / vector_length, ), dtype=vtype) _, res_desc = sdfg.add_array("res", (SIZE / vector_length, ), dtype=vtype) state = sdfg.add_state("stencil_node_test_1d") a = state.add_read("a") res = state.add_write("res") stencil_node = Stencil("stencil_test", """\ tmp0 = (a[0] + a[1]) tmp1 = (tmp0 + a[2]) res[1] = (dace.float32(0.3333) * tmp1)""", inputs={"a"}, outputs={"res"}) stencil_node.implementation = implementation state.add_node(stencil_node) state.add_edge(a, None, stencil_node, "a", dace.Memlet.from_array("a", a_desc)) state.add_edge(stencil_node, "res", res, None, dace.Memlet.from_array("res", res_desc)) return sdfg
def __descriptor__(self) -> Array: dtype = dace.typeclass(self.dtype.type) # Adapted from dace.data.create_datadescriptor return Array( dtype=dtype, strides=tuple(s // self.itemsize for s in self.strides), shape=self.shape, transient=True, # <----- Different part )
def make_sdfg_2d(implementation: str, vector_length: int): vtype = dace.vector(dace.typeclass(DTYPE), vector_length) if vector_length > 1 else DTYPE sdfg = dace.SDFG(f"stencil_node_test_2d_w{vector_length}") _, a_desc = sdfg.add_array("a", (ROWS, COLS / vector_length), dtype=vtype) _, b_desc = sdfg.add_array("b", (ROWS, ), dtype=DTYPE) sdfg.add_symbol("c", DTYPE) _, d_desc = sdfg.add_array("d", (ROWS, COLS / vector_length), dtype=vtype) _, res_desc = sdfg.add_array("res", (ROWS, COLS / vector_length), dtype=vtype) state = sdfg.add_state("stencil_node_test_2d") a = state.add_read("a") b = state.add_read("b") d = state.add_read("d") res = state.add_write("res") stencil_node = Stencil( "stencil_test", "res[0, 0] = c * b[0] * (a[-1, 0] + a[1, 0] + a[0, -1] + a[0, 1]) + d[0, -1] + d[0, 1]", iterator_mapping={"b": (True, False)}) stencil_node.implementation = implementation state.add_node(stencil_node) state.add_memlet_path(a, stencil_node, dst_conn="a", memlet=dace.Memlet.from_array("a", a_desc)) state.add_memlet_path(b, stencil_node, dst_conn="b", memlet=dace.Memlet.from_array("b", b_desc)) state.add_memlet_path(d, stencil_node, dst_conn="d", memlet=dace.Memlet.from_array("d", d_desc)) state.add_memlet_path(stencil_node, res, src_conn="res", memlet=dace.Memlet.from_array("res", res_desc)) return sdfg
def _add_optionals(self, sdfg, **kwargs): for name, info in self.stencil_object.field_info.items(): if info.access == AccessKind.NONE and name in kwargs: outer_array = kwargs[name] sdfg.add_array( name, shape=outer_array.shape, dtype=outer_array.dtype, strides=outer_array.strides, ) for name, info in self.stencil_object.parameter_info.items(): if info.access == AccessKind.NONE and name in kwargs: if isinstance(kwargs[name], dace.data.Scalar): sdfg.add_symbol(name, stype=kwargs[name].dtype) else: sdfg.add_symbol(name, stype=dace.typeclass(type(kwargs[name]))) return sdfg
def __descriptor__(self) -> "dace.data.Array": storage = (dace.StorageType.GPU_Global if hasattr( self, "__cuda_array_interface__") else dace.StorageType.CPU_Heap) start_offset = (int( np.array([self.default_origin]) @ np.array([self.strides]).T) // self.itemsize) total_size = int( int(np.array([self.shape]) @ np.array([self.strides]).T) // self.itemsize) start_offset = ( start_offset % gt_backend.from_name(self.backend).storage_info["alignment"]) descriptor = dace.data.Array( shape=self.shape, strides=[s // self.itemsize for s in self.strides], dtype=dace.typeclass(str(self.dtype)), storage=storage, total_size=total_size, start_offset=start_offset, ) descriptor.default_origin = self.default_origin return descriptor
def make_main_state(sdfg): state = sdfg.add_state("filter") A = state.add_array( "A_device", [N], dtype=dtype, transient=True, storage=StorageType.FPGA_Global) ratio = state.add_scalar( "ratio", storage=StorageType.FPGA_Global, dtype=dtype) outsize = state.add_array( "outsize_device", [1], dtype=dace.uint32, transient=True, storage=StorageType.FPGA_Global) B = state.add_array( "B_device", [N], dtype=dtype, transient=True, storage=StorageType.FPGA_Global) A_pipe_in = state.add_stream( "A_pipe", dtype=dtype, buffer_size=buffer_size, veclen=W.get(), transient=True, storage=StorageType.FPGA_Local) A_pipe_out = state.add_stream( "A_pipe", dtype=dtype, buffer_size=buffer_size, veclen=W.get(), transient=True, storage=StorageType.FPGA_Local) B_pipe_in = state.add_stream( "B_pipe", dtype=dtype, buffer_size=buffer_size, veclen=W.get(), transient=True, storage=StorageType.FPGA_Local) B_pipe_out = state.add_stream( "B_pipe", dtype=dtype, buffer_size=buffer_size, veclen=W.get(), transient=True, storage=StorageType.FPGA_Local) valid_pipe_in = state.add_stream( "valid_pipe", dtype=dace.typeclass(bool), buffer_size=buffer_size, transient=True, storage=StorageType.FPGA_Local) valid_pipe_out = state.add_stream( "valid_pipe", dtype=dace.typeclass(bool), buffer_size=buffer_size, transient=True, storage=StorageType.FPGA_Local) read_sdfg = make_read_sdfg() read_tasklet = state.add_nested_sdfg(read_sdfg, sdfg, {"A_mem"}, {"_A_pipe"}) compute_sdfg = make_compute_sdfg() compute_tasklet = state.add_nested_sdfg( compute_sdfg, sdfg, {"_A_pipe", "ratio_nested"}, {"_B_pipe", "_valid_pipe", "count"}) write_sdfg = make_write_sdfg() write_tasklet = state.add_nested_sdfg( write_sdfg, sdfg, {"_B_pipe", "_valid_pipe"}, {"B_mem"}) state.add_memlet_path( A, read_tasklet, dst_conn="A_mem", memlet=Memlet.simple(A, "0:N", num_accesses="N", veclen=W.get())) state.add_memlet_path( read_tasklet, A_pipe_out, src_conn="_A_pipe", memlet=Memlet.simple( A_pipe_out, "0", num_accesses="N", veclen=W.get())) state.add_memlet_path( A_pipe_in, compute_tasklet, dst_conn="_A_pipe", memlet=Memlet.simple(A_pipe_in, "0", num_accesses="N", veclen=W.get())) state.add_memlet_path( ratio, compute_tasklet, dst_conn="ratio_nested", memlet=Memlet.simple(ratio, "0")) state.add_memlet_path( compute_tasklet, B_pipe_out, src_conn="_B_pipe", memlet=Memlet.simple( B_pipe_out, "0", num_accesses="N", veclen=W.get())) state.add_memlet_path( compute_tasklet, valid_pipe_out, src_conn="_valid_pipe", memlet=Memlet.simple(valid_pipe_out, "0", num_accesses="N")) state.add_memlet_path( compute_tasklet, outsize, src_conn="count", memlet=Memlet.simple(outsize, "0")) state.add_memlet_path( B_pipe_in, write_tasklet, dst_conn="_B_pipe", memlet=Memlet.simple(B_pipe_in, "0", veclen=W.get(), num_accesses="N")) state.add_memlet_path( valid_pipe_in, write_tasklet, dst_conn="_valid_pipe", memlet=Memlet.simple(valid_pipe_in, "0", num_accesses="N")) state.add_memlet_path( write_tasklet, B, src_conn="B_mem", memlet=Memlet.simple(B, "0:N", num_accesses=-1, veclen=W.get())) return state
def make_write_sdfg(): sdfg = SDFG("filter_write") loop_begin = sdfg.add_state("loop_begin") loop_entry = sdfg.add_state("loop_entry") state = sdfg.add_state("loop_body") loop_end = sdfg.add_state("loop_end") i_write_zero = loop_begin.add_scalar( "i_write", dtype=dace.types.uint32, transient=True, storage=StorageType.FPGA_Registers) zero_tasklet = loop_begin.add_tasklet("zero", {}, {"i_write_out"}, "i_write_out = 0") loop_begin.add_memlet_path( zero_tasklet, i_write_zero, src_conn="i_write_out", memlet=Memlet.simple(i_write_zero, "0")) sdfg.add_edge( loop_begin, loop_entry, dace.graph.edges.InterstateEdge(assignments={"i": 0})) sdfg.add_edge( loop_entry, state, dace.graph.edges.InterstateEdge( condition=dace.properties.CodeProperty.from_string( "i < N + W", language=dace.types.Language.Python))) sdfg.add_edge( loop_entry, loop_end, dace.graph.edges.InterstateEdge( condition=dace.properties.CodeProperty.from_string( "i >= N + W", language=dace.types.Language.Python))) sdfg.add_edge( state, loop_entry, dace.graph.edges.InterstateEdge(assignments={"i": "i + W"})) B = state.add_array( "B_mem", [N], dtype=dtype, storage=StorageType.FPGA_Global) B_pipe = state.add_stream( "_B_pipe", dtype=dtype, buffer_size=buffer_size, veclen=W.get(), storage=StorageType.FPGA_Local) valid_pipe = state.add_stream( "_valid_pipe", dtype=dace.typeclass(bool), buffer_size=buffer_size, storage=StorageType.FPGA_Local) i_write_in = state.add_scalar( "i_write", dtype=dace.types.uint32, transient=True, storage=StorageType.FPGA_Registers) i_write_out = state.add_scalar( "i_write", dtype=dace.types.uint32, transient=True, storage=StorageType.FPGA_Registers) tasklet = state.add_tasklet( "write", {"b_in", "valid_in", "i_write_in"}, {"b_out", "i_write_out"}, "if valid_in:" "\n\tb_out[i_write_in] = b_in" "\n\ti_write_out = i_write_in + 1" "\nelse:" "\n\ti_write_out = i_write_in") state.add_memlet_path( B_pipe, tasklet, dst_conn="b_in", memlet=Memlet.simple(B_pipe, "0", veclen=W.get())) state.add_memlet_path( valid_pipe, tasklet, dst_conn="valid_in", memlet=Memlet.simple(valid_pipe, "0")) state.add_memlet_path( i_write_in, tasklet, dst_conn="i_write_in", memlet=Memlet.simple(i_write_in, "0")) state.add_memlet_path( tasklet, i_write_out, src_conn="i_write_out", memlet=Memlet.simple(i_write_out, "0")) state.add_memlet_path( tasklet, B, src_conn="b_out", memlet=Memlet.simple(B, "0:N", W.get())) return sdfg
def dace_dtype_to_typestr(dtype: Any): if not isinstance(dtype, dace.typeclass): dtype = dace.typeclass(dtype) return dtype.as_numpy_dtype().str