def test_fusion_range(): ti.init(arch=ti.cpu, async_mode=True, async_opt_fusion=False, async_opt_intermediate_file="fusion_range") x = ti.field(ti.i32) y = ti.field(ti.i32) z = ti.field(ti.i32) n = 128 block = ti.root.dense(ti.i, n) block.place(x, y, z) @ti.kernel def foo(): for i in range(n): y[i] = x[i] + 1 @ti.kernel def bar(): for i in range(n): z[i] = y[i] + 1 foo() bar() foo() ti.sync()
def test_nested_struct_fill_and_clear(): a = ti.var(dt=ti.f32) N = 512 @ti.layout def place(): ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a) @ti.kernel def fill(): for i, j in ti.ndrange(N * 8, N * 8): a[i, j] = 2.0 @ti.kernel def clear(): for i, j in a.parent(): ti.deactivate(a.parent().parent(), [i, j]) def task(): fill() clear() for i in range(10): task() ti.sync()
def test_write_after_read(): # TODO: @xumingkuan fusion on this case fails ti.init(arch=ti.cpu, async_mode=True, async_opt_dse=False, async_opt_listgen=False, async_opt_fusion=False, async_opt_intermediate_file="war") x = ti.field(ti.i32, shape=16) @ti.kernel def p(): print(x[ti.random(ti.i32) % 16]) @ti.kernel def s(): x[ti.random(ti.i32) % 16] = 3 p() p() p() s() p() p() s() s() s() ti.sync()
def run_benchmark(): compile_time = time.time() func(*args) compile_time = time.time() - compile_time ti.stat_write('compilation_time', compile_time) codegen_stat = ti.core.stat() for line in codegen_stat.split('\n'): try: a, b = line.strip().split(':') except: continue a = a.strip() b = int(float(b)) if a == 'codegen_kernel_statements': ti.stat_write('instructions', b) if a == 'codegen_offloaded_tasks': ti.stat_write('offloaded_tasks', b) elif a == 'launched_kernels': ti.stat_write('launched_kernels', b) # The reason why we run 4 times is to warm up instruction/data caches. # Discussion: https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136 for i in range(4): func(*args) # compile the kernel first ti.sync() t = time.time() for n in range(repeat): func(*args) ti.get_runtime().sync() elapsed = time.time() - t avg = elapsed / repeat ti.stat_write('running_time', avg)
def to_numpy(self, keep_dims=False, as_vector=None, dtype=None): """Converts `self` to a numpy array. Args: keep_dims (bool, optional): Whether to keep the dimension after conversion. When keep_dims=True, on an n-D matrix field, the numpy array always has n+2 dims, even for 1x1, 1xn, nx1 matrix fields. When keep_dims=False, the resulting numpy array should skip the matrix dims with size 1. For example, a 4x1 or 1x4 matrix field with 5x6x7 elements results in an array of shape 5x6x7x4. as_vector (bool, deprecated): Whether to make the returned numpy array as a vector, i.e., with shape (n,) rather than (n, 1). Note that this argument has been deprecated. More discussion about `as_vector`: https://github.com/taichi-dev/taichi/pull/1046#issuecomment-633548858. dtype (DataType, optional): The desired data type of returned numpy array. Returns: numpy.ndarray: The result numpy array. """ if as_vector is not None: warning( 'v.to_numpy(as_vector=True) is deprecated, ' 'please use v.to_numpy() directly instead', DeprecationWarning, stacklevel=3) if dtype is None: dtype = to_numpy_type(self.dtype) as_vector = self.m == 1 and not keep_dims shape_ext = (self.n, ) if as_vector else (self.n, self.m) import numpy as np arr = np.zeros(self.shape + shape_ext, dtype=dtype) from taichi.lang.meta import matrix_to_ext_arr matrix_to_ext_arr(self, arr, as_vector) ti.sync() return arr
def test_fusion(): ti.init(arch=ti.cpu, async_mode=True) x = ti.field(ti.i32) y = ti.field(ti.i32) z = ti.field(ti.i32) num_dense_layers = 1 block = ti.root.pointer(ti.i, 128) for i in range(num_dense_layers): block = block.dense(ti.i, 2) block.place(x, y, z) @ti.kernel def foo(): for i in x: y[i] = x[i] + 1 @ti.kernel def bar(): for i in y: z[i] = y[i] + 1 foo() bar() ti.sync() ti.core.print_sfg() dot = ti.dump_dot("fusion.dot") print(dot) ti.dot_to_pdf(dot, "fusion.pdf")
def test_write_after_read(): ti.init(arch=ti.cpu, async_mode=True) x = ti.field(ti.i32, shape=16) @ti.kernel def p(): print(x[ti.random(ti.i32) % 16]) @ti.kernel def s(): x[ti.random(ti.i32) % 16] = 3 p() p() p() s() p() p() s() s() s() ti.sync() ti.core.print_sfg() dot = ti.dump_dot("war.dot") print(dot) ti.dot_to_pdf(dot, "war.pdf")
def test_fuse_allocator_state(): N = 16 x = ti.field(dtype=ti.i32, shape=N) y = ti.field(dtype=ti.i32) y_parent = ti.root.pointer(ti.i, N * 2) y_parent.place(y) # https://github.com/taichi-dev/taichi/pull/1973#pullrequestreview-511154376 @ti.kernel def activate_y(): for i in x: idx = i + 1 y[idx] = idx @ti.kernel def deactivate_y(): for i in x: ti.deactivate(y_parent, i) activate_y() deactivate_y() ti.sync() # TODO: assert that activate_y and deactivate_y are not fused. assert y_parent._num_dynamically_allocated == N ys = y.to_numpy() for i, y in enumerate(ys): expected = N if i == N else 0 assert y == expected
def test_multi_print(): @ti.kernel def func(x: ti.i32, y: ti.f32): print(x, 1234.5, y) func(666, 233.3) ti.sync()
def parallel_sort(keys, values=None): N = keys.shape[0] @ti.kernel def sort_stage(keys: ti.template(), use_values: int, values: ti.template(), N: int, p: int, k: int, invocations: int): for inv in range(invocations): j = k % p + inv * 2 * k for i in range(0, min(k, N - j - k)): a = i + j b = i + j + k if int(a / (p * 2)) == int(b / (p * 2)): key_a = keys[a] key_b = keys[b] if key_a > key_b: keys[a] = key_b keys[b] = key_a if use_values != 0: temp = values[a] values[a] = values[b] values[b] = temp p = 1 while p < N: k = p while k >= 1: invocations = int((N - k - k % p) / (2 * k)) + 1 if values is None: sort_stage(keys, 0, keys, N, p, k, invocations) else: sort_stage(keys, 1, values, N, p, k, invocations) ti.sync() k = int(k / 2) p = int(p * 2)
def to_numpy(self): import numpy as np from taichi.lang.meta import tensor_to_ext_arr arr = np.zeros(shape=self.shape, dtype=to_numpy_type(self.dtype)) tensor_to_ext_arr(self, arr) ti.sync() return arr
def test_fusion(): ti.init(arch=ti.cpu, async_mode=True, async_opt_intermediate_file="fusion", async_opt_fusion=False) x = ti.field(ti.i32) y = ti.field(ti.i32) z = ti.field(ti.i32) num_dense_layers = 1 block = ti.root.pointer(ti.i, 128) for i in range(num_dense_layers): block = block.dense(ti.i, 2) block.place(x, y, z) @ti.kernel def foo(): for i in x: y[i] = x[i] + 1 @ti.kernel def bar(): for i in y: z[i] = y[i] + 1 foo() bar() ti.sync()
def to_torch(self, device=None): import torch # pylint: disable=C0415 arr = torch.zeros(size=self.shape, dtype=to_pytorch_type(self.dtype), device=device) taichi.lang.meta.tensor_to_ext_arr(self, arr) ti.sync() return arr
def to_numpy(self): from .meta import tensor_to_ext_arr import numpy as np arr = np.zeros(shape=self.shape, dtype=to_numpy_type(self.dtype)) tensor_to_ext_arr(self, arr) import taichi as ti ti.sync() return arr
def to_numpy(self, dtype=None): if dtype is None: dtype = to_numpy_type(self.dtype) import numpy as np # pylint: disable=C0415 arr = np.zeros(shape=self.shape, dtype=dtype) taichi.lang.meta.tensor_to_ext_arr(self, arr) ti.sync() return arr
def to_numpy(self, dtype=None): if dtype is None: dtype = to_numpy_type(self.dtype) import numpy as np arr = np.zeros(shape=self.shape, dtype=dtype) from taichi.lang.meta import tensor_to_ext_arr tensor_to_ext_arr(self, arr) ti.sync() return arr
def test_print_string(): @ti.kernel def func(x: ti.i32, y: ti.f32): # make sure `%` doesn't break vprintf: print('hello, world! %s %d %f', 233, y) print('cool', x, 'well', y) func(666, 233.3) ti.sync()
def from_numpy(self, arr): assert len(self.shape) == len(arr.shape) for i in range(len(self.shape)): assert self.shape[i] == arr.shape[i] if hasattr(arr, 'contiguous'): arr = arr.contiguous() from taichi.lang.meta import ext_arr_to_tensor ext_arr_to_tensor(arr, self) ti.sync()
def to_numpy(self): from .meta import tensor_to_ext_arr import numpy as np arr = np.empty( shape=self.shape(), dtype=to_numpy_type(self.snode().data_type())) tensor_to_ext_arr(self, arr) import taichi as ti ti.sync() return arr
def test_print(dt): @ti.kernel def func(): print(ti.cast(1234.5, dt)) func() # Discussion: https://github.com/taichi-dev/taichi/issues/1063#issuecomment-636421904 # Synchronize to prevent cross-test failure of print: ti.sync()
def to_torch(self, device=None): import torch from taichi.lang.meta import tensor_to_ext_arr arr = torch.zeros(size=self.shape, dtype=to_pytorch_type(self.dtype), device=device) tensor_to_ext_arr(self, arr) ti.sync() return arr
def to_torch(self, device=None): from .meta import tensor_to_ext_arr import torch arr = torch.empty( size=self.shape(), dtype=to_pytorch_type(self.snode().data_type()), device=device) tensor_to_ext_arr(self, arr) import taichi as ti ti.sync() return arr
def set_image(self, img): """Draw an image on canvas. Args: img (Union[ti.field, numpy.array]): The color array representing the image to be drawn. Support greyscale, RG, RGB, and RGBA color representations. Its shape must match GUI resolution. """ if self.fast_gui: assert isinstance(img, taichi.lang.matrix.MatrixField), \ "Only ti.Vector.field is supported in GUI.set_image when fast_gui=True" assert img.shape == self.res, \ "Image resolution does not match GUI resolution" assert img.n in [3, 4] and img.m == 1, \ "Only RGB images are supported in GUI.set_image when fast_gui=True" assert img.dtype in [ti.f32, ti.f64, ti.u8], \ "Only f32, f64, u8 are supported in GUI.set_image when fast_gui=True" taichi.lang.meta.vector_to_fast_image(img, self.img) return if isinstance(img, ScalarField): if _ti_core.is_integral(img.dtype) or len(img.shape) != 2: # Images of uint is not optimized by xxx_to_image self.img = self.cook_image(img.to_numpy()) else: # Type matched! We can use an optimized copy kernel. assert img.shape \ == self.res, "Image resolution does not match GUI resolution" taichi.lang.meta.tensor_to_image(img, self.img) ti.sync() elif isinstance(img, taichi.lang.matrix.MatrixField): if _ti_core.is_integral(img.dtype): self.img = self.cook_image(img.to_numpy()) else: # Type matched! We can use an optimized copy kernel. assert img.shape == self.res, \ "Image resolution does not match GUI resolution" assert img.n in [2, 3, 4] and img.m == 1, \ "Only greyscale, RG, RGB or RGBA images are supported in GUI.set_image" taichi.lang.meta.vector_to_image(img, self.img) ti.sync() elif isinstance(img, np.ndarray): self.img = self.cook_image(img) else: raise ValueError( f"GUI.set_image only takes a Taichi field or NumPy array, not {type(img)}" ) self.core.set_img(self.img.ctypes.data)
def from_numpy(self, arr): assert len(self.shape) == len(arr.shape) s = self.shape for i in range(len(self.shape)): assert s[i] == arr.shape[i] from .meta import ext_arr_to_tensor if hasattr(arr, 'contiguous'): arr = arr.contiguous() ext_arr_to_tensor(arr, self) ti.sync()
def to_torch(self, device=None): from .meta import tensor_to_ext_arr import torch arr = torch.zeros(size=self.shape, dtype=to_pytorch_type(self.dtype), device=device) tensor_to_ext_arr(self, arr) import taichi as ti ti.sync() return arr
def test_print_fstring(): def foo1(x): return x + 1 @ti.kernel def func(i: ti.i32, f: ti.f32): print(f'qwe {foo1(1)} {foo1(2) * 2 - 1} {i} {f} {4} {True} {1.23}') func(123, 4.56) ti.sync()
def test_remove_clear_list_from_fused_serial(): x = ti.field(ti.i32) y = ti.field(ti.i32) z = ti.field(ti.i32, shape=()) n = 32 ti.root.pointer(ti.i, n).dense(ti.i, 1).place(x) ti.root.pointer(ti.i, n).dense(ti.i, 1).place(y) @ti.kernel def init_xy(): for i in range(n): if i & 1: x[i] = i else: y[i] = i init_xy() ti.sync() stats = ti.get_kernel_stats() stats.clear() @ti.kernel def inc(f: ti.template()): for i in f: f[i] += 1 @ti.kernel def serial_z(): z[None] = 40 z[None] += 2 inc(x) inc(y) serial_z() inc(x) inc(y) ti.sync() counters = stats.get_counters() # each of x and y has two listgens: root -> pointer -> dense assert int(counters['launched_tasks_list_gen']) == 4 # clear list tasks have been fused into serial_z assert int(counters['launched_tasks_serial']) == 1 xs = x.to_numpy() ys = y.to_numpy() for i in range(n): if i & 1: assert xs[i] == i + 2 assert ys[i] == 0 else: assert ys[i] == i + 2 assert xs[i] == 0
def from_numpy(self, arr): assert self.dim() == len(arr.shape) s = self.shape() for i in range(self.dim()): assert s[i] == arr.shape[i] from .meta import ext_arr_to_tensor if hasattr(arr, 'contiguous'): arr = arr.contiguous() ext_arr_to_tensor(arr, self) import taichi as ti ti.sync()
def test_print_matrix(): x = ti.Matrix(2, 3, dt=ti.f32, shape=()) y = ti.Vector(3, dt=ti.f32, shape=3) @ti.kernel def func(k: ti.f32): print('hello', x[None], 'world!') print(y[2] * k, x[None] / k, y[2]) func(233.3) ti.sync()
def from_numpy(self, ndarray): if len(ndarray.shape) == self.loop_range().dim() + 1: as_vector = True assert self.m == 1, "This matrix is not a vector" else: as_vector = False assert len(ndarray.shape) == self.loop_range().dim() + 2 from .meta import ext_arr_to_matrix ext_arr_to_matrix(ndarray, self, as_vector) import taichi as ti ti.sync()