def stencil_2d_sparse_bls(arch, repeat, scatter, bls, container, dtype, dsize_2d, get_metric): dsize = dsize_2d[0] * dsize_2d[1] if dsize <= 4096 or dsize > 67108864: # 16KB <= dsize <= 64 MB: Sparse-specific parameters return None repeat = scaled_repeat_times( arch, dsize, 1) # basic_repeat_time = 1: Sparse-specific parameters block_elements_2d = (dsize_2d[0] // dtype_size(dtype) // 8, dsize_2d[1] // 2 // 8) block = ti.root.pointer(ti.ij, block_elements_2d) y = ti.field(dtype) x = ti.field(dtype) block.dense(ti.ij, 8).place(y) block.dense(ti.ij, 8).place(x) @ti.kernel def active_all(): for i, j in ti.ndrange(block_elements_2d[0], block_elements_2d[0]): ti.activate(block, [i, j]) active_all() @ti.kernel def stencil_2d(y: ti.template(), x: ti.template()): #reference: tests/python/bls_test_template.py if ti.static(bls and not scatter): ti.block_local(x) if ti.static(bls and scatter): ti.block_local(y) ti.block_dim(64) # 8*8=64 for I in ti.grouped(x): if ti.static(scatter): for offset in ti.static(stencil_common): y[I + ti.Vector(offset)] += x[I] else: # gather s = ti.cast(0.0, dtype) for offset in ti.static(stencil_common): s = s + x[I + ti.Vector(offset)] y[I] = s fill_random(x, dtype, container) return get_metric(repeat, stencil_2d, y, x)
def memcpy_default(arch, repeat, container, dtype, dsize, get_metric): @ti.kernel def memcpy_field(dst: ti.template(), src: ti.template()): for I in ti.grouped(dst): dst[I] = src[I] @ti.kernel def memcpy_array(dst: ti.types.ndarray(), src: ti.types.ndarray()): for I in ti.grouped(dst): dst[I] = src[I] repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size(dtype) // 2 # y=x x = container(dtype, num_elements) y = container(dtype, num_elements) func = memcpy_field if container == ti.field else memcpy_array fill_random(x, dtype, container) return get_metric(repeat, func, y, x)
def reduction_default(arch, repeat, atomic_op, container, dtype, dsize, get_metric): repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size(dtype) x = container(dtype, shape=num_elements) y = container(dtype, shape=()) y[None] = 0 @ti.kernel def reduction_field(y: ti.template(), x: ti.template()): for i in x: atomic_op(y[None], x[i]) @ti.kernel def reduction_array(y: ti.types.ndarray(), x: ti.types.ndarray()): for i in x: atomic_op(y[None], x[i]) fill_random(x, dtype, container) func = reduction_field if container == ti.field else reduction_array return get_metric(repeat, func, y, x)
def saxpy_default(arch, repeat, container, dtype, dsize, get_metric): repeat = scaled_repeat_times(arch, dsize, repeat) num_elements = dsize // dtype_size(dtype) // 3 #z=x+y x = container(dtype, num_elements) y = container(dtype, num_elements) z = container(dtype, num_elements) @ti.kernel def saxpy_field(z: ti.template(), x: ti.template(), y: ti.template()): for i in z: z[i] = 17 * x[i] + y[i] @ti.kernel def saxpy_array(z: ti.any_arr(), x: ti.any_arr(), y: ti.any_arr()): for i in z: z[i] = 17 * x[i] + y[i] fill_random(x, dtype, container) fill_random(y, dtype, container) func = saxpy_field if container == ti.field else saxpy_array return get_metric(repeat, func, z, x, y)
def stencil_2d_default(arch, repeat, scatter, bls, container, dtype, dsize_2d, get_metric): dsize = dsize_2d[0] * dsize_2d[1] repeat = scaled_repeat_times(arch, dsize, repeat) num_elements_2d = (dsize_2d[0] // dtype_size(dtype), dsize_2d[1] // 2) y = container(dtype, shape=num_elements_2d) x = container(dtype, shape=num_elements_2d) @ti.kernel def stencil_2d_field(y: ti.template(), x: ti.template()): for I in ti.grouped(x): if ti.static(scatter): for offset in ti.static(stencil_common): y[I + ti.Vector(offset)] += x[I] else: # gather s = ti.cast(0.0, dtype) for offset in ti.static(stencil_common): s = s + x[I + ti.Vector(offset)] y[I] = s @ti.kernel def stencil_2d_array(y: ti.any_arr(), x: ti.any_arr()): for I in ti.grouped(x): if ti.static(scatter): for offset in ti.static(stencil_common): y[I + ti.Vector(offset)] += x[I] else: # gather s = ti.cast(0.0, dtype) for offset in ti.static(stencil_common): s = s + x[I + ti.Vector(offset)] y[I] = s fill_random(x, dtype, container) func = stencil_2d_field if container == ti.field else stencil_2d_array return get_metric(repeat, func, y, x)