def benchmark_nested_struct_fill_and_clear(): a = ti.var(dt=ti.f32) N = 512 @ti.layout def place(): ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a) @ti.kernel def fill(): for i, j in ti.ndrange(N * 8, N * 8): a[i, j] = 2.0 @ti.kernel def clear(): for i, j in a.parent(): ti.deactivate(a.parent().parent(), [i, j]) def task(): fill() clear() return ti.benchmark(task, repeat=30)
def test_grouped(): ti.get_runtime().print_preprocessed = True val = ti.var(ti.i32) n = 4 m = 8 p = 16 @ti.layout def values(): ti.root.dense(ti.i, n).dense(ti.j, m).dense(ti.k, p).place(val) @ti.kernel def test(): for I in ti.grouped(val): val[I] = I[0] + I[1] * 2 + I[2] * 3 test() for i in range(n): for j in range(m): for k in range(p): assert val[i, j, k] == i + j * 2 + k * 3
def test_io_simple(): n = 32 x1 = ti.var(ti.f32, shape=(n, n)) t1 = torch.tensor(2 * np.ones((n, n), dtype=np.float32)) x2 = ti.Matrix(2, 3, ti.f32, shape=(n, n)) t2 = torch.tensor(2 * np.ones((n, n, 2, 3), dtype=np.float32)) x1.from_torch(t1) for i in range(n): for j in range(n): assert x1[i, j] == 2 x2.from_torch(t2) for i in range(n): for j in range(n): for k in range(2): for l in range(3): assert x2[i, j][k, l] == 2 t3 = x2.to_torch() assert (t2 == t3).all()
def test_atomic_add_with_local_store_simplify2(): # Test for the following LocalStoreStmt simplification case: # # local store [$a <- ...] # atomic add ($a, ...) # # Specifically, the local store should not be removed, because # atomic_add can return its value. x = ti.var(ti.i32) step = 42 ti.root.dense(ti.i, n).place(x) @ti.kernel def func(): for i in range(n): j = i x[i] = ti.atomic_add(j, step) func() for i in range(n): assert x[i] == i
def benchmark_nested_range_blocked(): a = ti.var(dt=ti.f32) N = 512 @ti.layout def place(): ti.root.dense(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a) @ti.kernel def fill(): for X in range(N * N): for Y in range(64): a[X // N * 8 + Y // 8, X % N * 8 + Y % 8] = 2.0 fill() ti.get_runtime().sync() t = time.time() for n in range(100): fill() elapsed = time.time() - t ti.get_runtime().sync() return elapsed / 100
def test_loop_arg_as_range(): # Dynamic range loops are intended to make sure global tmps work x = ti.var(ti.i32) n = 1000 @ti.layout def layout(): ti.root.dense(ti.i, n).place(x) @ti.kernel def test(b: ti.i32, e: ti.i32): for i in range(b, e): x[i - b] = i pairs = [ (0, n // 2), (n // 2, n), (-n // 2, -n // 3), ] for b, e in pairs: test(b, e) for i in range(b, e): assert x[i - b] == i
def test_numpy(): val = ti.var(ti.i32) n = 4 @ti.layout def values(): ti.root.dense(ti.i, n).place(val) @ti.kernel def test_numpy(arr: np.ndarray): for i in range(n): arr[i] = arr[i] ** 2 a = np.array([4, 8, 1, 24], dtype=np.float32) for i in range(n): a[i] = i * 2 test_numpy(a) for i in range(n): assert a[i] == i * i * 4
def with_data_type(dt): val = ti.var(ti.i32) n = 4 @ti.layout def values(): ti.root.dense(ti.i, n).place(val) @ti.kernel def test_numpy(arr: ti.ext_arr()): for i in range(n): arr[i] = arr[i]**2 a = np.array([4, 8, 1, 24], dtype=dt) for i in range(n): a[i] = i * 2 test_numpy(a) for i in range(n): assert a[i] == i * i * 4
def __init__( self, nx, # domain size ny, niu, # viscosity of fluid bc_type, # [left,top,right,bottom] boundary conditions: 0 -> Dirichlet ; 1 -> Neumann bc_value, # if bc_type = 0, we need to specify the velocity in bc_value cy=0, # whether to place a cylindrical obstacle cy_para=[0.0, 0.0, 0.0], # location and radius of the cylinder steps=60000): # total steps to run self.nx = nx # by convention, dx = dy = dt = 1.0 (lattice units) self.ny = ny self.niu = niu self.tau = 3.0 * niu + 0.5 self.inv_tau = 1.0 / self.tau self.rho = ti.var(dt=ti.f32, shape=(nx, ny)) self.vel = ti.Vector(2, dt=ti.f32, shape=(nx, ny)) self.mask = ti.var(dt=ti.f32, shape=(nx, ny)) self.f_old = ti.Vector(9, dt=ti.f32, shape=(nx, ny)) self.f_new = ti.Vector(9, dt=ti.f32, shape=(nx, ny)) self.w = ti.var(dt=ti.f32, shape=9) self.e = ti.var(dt=ti.i32, shape=(9, 2)) self.bc_type = ti.var(dt=ti.i32, shape=4) self.bc_value = ti.var(dt=ti.f32, shape=(4, 2)) self.cy = cy self.cy_para = ti.var(dt=ti.f32, shape=3) self.bc_type.from_numpy(np.array(bc_type, dtype=np.int32)) self.bc_value.from_numpy(np.array(bc_value, dtype=np.float32)) self.cy_para.from_numpy(np.array(cy_para, dtype=np.float32)) self.steps = steps arr = np.array([ 4.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0, 1.0 / 9.0, 1.0 / 36.0, 1.0 / 36.0, 1.0 / 36.0, 1.0 / 36.0 ], dtype=np.float32) self.w.from_numpy(arr) arr = np.array([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], [1, 1], [-1, 1], [-1, -1], [1, -1]], dtype=np.int32) self.e.from_numpy(arr)
def test_argument_error(): x = ti.var(ti.i32) @ti.layout def layout(): ti.root.place(x) try: @ti.kernel def set_i32_notype(v): pass except ti.KernelDefError: pass try: @ti.kernel def set_i32_args(*args): pass except ti.KernelDefError: pass try: @ti.kernel def set_i32_kwargs(**kwargs): pass except ti.KernelDefError: pass @ti.kernel def set_i32(v: ti.i32): x[None] = v set_i32(123) assert x[None] == 123
def test_local_store_in_nested_for_and_if(): # See https://github.com/taichi-dev/taichi/pull/862. val = ti.var(ti.i32, shape=(3, 3, 3)) @ti.kernel def func(): ti.serialize() for i, j, k in val: if i < 2 and j < 2 and k < 2: a = 0 for di, dj, dk in ti.ndrange((0, 2), (0, 2), (0, 2)): if val[i + di, j + dj, k + dk] == 1: a = val[i + di, j + dj, k + dk] for di, dj, dk in ti.ndrange((0, 2), (0, 2), (0, 2)): val[i + di, j + dj, k + dk] = a val[1, 1, 1] = 1 func() for i in range(3): for j in range(3): for k in range(3): assert (val[i, j, k] == 1)
def test_numpy_2d_transpose(): val = ti.var(ti.i32) n = 8 m = 8 ti.root.dense(ti.ij, (n, m)).place(val) @ti.kernel def test_numpy(arr: ti.ext_arr()): for i in ti.grouped(val): val[i] = arr[i] a = np.empty(shape=(n, m), dtype=np.int32) for i in range(n): for j in range(m): a[i, j] = i * j + i * 4 test_numpy(a.transpose()) for i in range(n): for j in range(m): assert val[i, j] == i * j + j * 4
def test_pointer2(): x = ti.var(ti.f32) n = 16 @ti.layout def place(): ti.root.dense(ti.i, n).pointer().dense(ti.i, n).place(x) @ti.kernel def func(): for i in range(n * n): x[i] = 1.0 @ti.kernel def set10(): x[10] = 10.0 @ti.kernel def clear(): for i in x.parent().parent(): ti.deactivate(x.parent().parent(), i) func() clear() for i in range(n * n): assert x[i] == 0.0 set10() for i in range(n * n): if i != 10: assert x[i] == 0.0 else: assert x[i] == 10.0
def test_scope(): # In the future the following code should throw an exception at the python front end # instead of crashing the compiler return ti.runtime.print_preprocessed = True for arch in [ti.x86_64, ti.cuda]: # ti.reset() ti.cfg.arch = arch x = ti.var(ti.f32) N = 1 @ti.layout def place(): ti.root.dense(ti.i, N).place(x) @ti.kernel def func(): if 1 > 0: val = 1 ti.print(val) func()
def test_static_grouped_ndrange(): val = ti.var(ti.i32) n = 4 m = 8 ti.root.dense(ti.ij, (n, m)).place(val) x0 = 2 y0 = 3 x1 = 1 y1 = 6 @ti.kernel def test(): for I in ti.static(ti.grouped(ti.ndrange((x0, y0), (x1, y1)))): val[I] = I[0] + I[1] * 2 test() for i in range(n): for j in range(m): assert val[i, j] == (i + j * 2 if x0 <= i < y0 and x1 <= j < y1 else 0)
def test_bitmasked_offset_child(): x = ti.var(ti.i32) x2 = ti.var(ti.i32) y = ti.var(ti.i32) y2 = ti.var(ti.i32) y3 = ti.var(ti.i32) z = ti.var(ti.i32) s = ti.var(ti.i32, shape=()) n = 16 # Offset children: # * In |bm|'s cell: |bm2| has a non-zero offset # * In |bm2|'s cell: |z| has a non-zero offset # * We iterate over |z| to test the listgen handles offsets correctly bm = ti.root.bitmasked(ti.i, n) bm.dense(ti.i, 16).place(x, x2) bm2 = bm.bitmasked(ti.i, 4) bm2.dense(ti.i, 4).place(y, y2, y3) bm2.bitmasked(ti.i, 4).place(z) @ti.kernel def func(): for _ in z: s[None] += 1 z[0] = 1 z[7] = 1 z[42] = 1 z[53] = 1 z[88] = 1 z[101] = 1 z[233] = 1 func() assert s[None] == 7
def __init__(self): self.dim = 2 self.inf = 1e10 self.epsilon = 1e-5 self.on = 100 self.vn = 1000 self.en = 1000 self.node = ti.Vector(self.dim, dt=ti.f32, shape=self.vn, needs_grad=True) self.prev_node = ti.Vector(self.dim, dt=ti.f32, shape=self.vn) self.prev_t_node = ti.Vector(self.dim, dt=ti.f32, shape=self.vn) self.bar_node = ti.Vector(self.dim, dt=ti.f32, shape=self.vn) self.p = ti.Vector(self.dim, dt=ti.f32, shape=self.vn) self.element = ti.Vector(self.dim + 1, dt=ti.i32, shape=self.en) # the end index of i's object self.vn_object_index = ti.var(dt=ti.i32, shape=self.on) self.en_object_index = ti.var(dt=ti.i32, shape=self.on) self.count = ti.var(dt=ti.i32, shape=()) # the inverse obj id of each node and ele self.node_obj_idx = ti.var(dt=ti.i32, shape=self.vn) self.element_obj_idx = ti.var(dt=ti.i32, shape=self.en) ## for simulation self.E = 6000 # Young modulus self.nu = 0.4 # Poisson's ratio: nu \in [0, 0.5) self.mu = self.E / (2 * (1 + self.nu)) self.la = self.E * self.nu / ((1 + self.nu) * (1 - 2 * self.nu)) self.dt = 5e-3 self.bar_d = 0.1 self.k = 1 # contact stiffness # self.velocity = ti.Vector(self.dim, dt=ti.f32, shape=self.vn) self.node_mass = ti.var(dt=ti.f32, shape=self.vn) self.element_mass = ti.var(dt=ti.f32, shape=self.en) self.element_volume = ti.var(dt=ti.f32, shape=self.en) self.energy = ti.var(dt=ti.f32, shape=(), needs_grad=True) self.prev_energy = ti.var(dt=ti.f32, shape=()) self.B = ti.Matrix(self.dim, self.dim, dt=ti.f32, shape=self.en) self.neighbor_element_count = ti.var(dt=ti.i32, shape=self.vn) ## for rendering self.begin_point = ti.Vector(self.dim, ti.f32, shape=(self.en * 3)) self.end_point = ti.Vector(self.dim, ti.f32, shape=(self.en * 3)) self.node_energy = ti.var(dt=ti.f32, shape=self.vn) self.edge_energy = ti.var(dt=ti.f32, shape=(self.en * 3)) self.score = 0 self.rendering_u0 = ti.var(dt=ti.f32, shape=()) self.rendering_u1 = ti.var(dt=ti.f32, shape=()) self.rendering_u2 = ti.var(dt=ti.f32, shape=()) self.rendering_u3 = ti.var(dt=ti.f32, shape=()) self.game_over = ti.var(dt=ti.i32, shape=()) ## the controlled object self.ctrl_obj = ti.var(dt=ti.i32, shape=()) self.move_d = ti.var(dt=ti.f32, shape=()) self.ctrl_obj[None] = -1 self.move_d[None] = 1e-3
import taichi as ti quality = 2 # Use a larger value for higher-res simulations n_particles, n_grid = 9000 * quality ** 2, 128 * quality dx, inv_dx = 1 / n_grid, float(n_grid) dt = 1e-4 / quality p_vol, p_rho = (dx * 0.5)**2, 1 p_mass = p_vol * p_rho E, nu = 0.1e4, 0.2 # Young's modulus and Poisson's ratio mu_0, lambda_0 = E / (2 * (1 + nu)), E * nu / ((1+nu) * (1 - 2 * nu)) # Lame parameters x = ti.Vector(2, dt=ti.f32, shape=n_particles) # position v = ti.Vector(2, dt=ti.f32, shape=n_particles) # velocity C = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # affine velocity field F = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # deformation gradient material = ti.var(dt=ti.i32, shape=n_particles) # material id Jp = ti.var(dt=ti.f32, shape=n_particles) # plastic deformation grid_v = ti.Vector(2, dt=ti.f32, shape=(n_grid, n_grid)) # grid node momemtum/velocity grid_m = ti.var(dt=ti.f32, shape=(n_grid, n_grid)) # grid node mass ti.cfg.arch = ti.cuda # Try to run on GPU @ti.kernel def substep(): for i, j in ti.ndrange(n_grid, n_grid): grid_v[i, j] = [0, 0] grid_m[i, j] = 0 for p in range(n_particles): # Particle state update and scatter to grid (P2G) base = (x[p] * inv_dx - 0.5).cast(int) fx = x[p] * inv_dx - base.cast(float) # Quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx, fx-1,fx-2] w = [0.5 * ti.sqr(1.5 - fx), 0.75 - ti.sqr(fx - 1), 0.5 * ti.sqr(fx - 0.5)] F[p] = (ti.Matrix.identity(ti.f32, 2) + dt * C[p]) @ F[p] # deformation gradient update
def on_init(self, n=512): self.n = n self.title = 'Julia Set' self.img = ti.var(ti.f32, (self.n * 2, self.n)) self.colormap = cm.get_cmap('magma') self.define_input()
import taichi as ti ti.init() x = ti.var(ti.i32) y = ti.var(ti.i32) ti.root.pointer(ti.ij, 4).dense(ti.ij, 8).place(x, y) @ti.kernel def copy(): for i, j in y: x[i, j] = y[i, j] copy()
import time from matplotlib.pyplot import cm import taichi as tc real = ti.f32 ti.set_default_fp(real) max_steps = 4096 vis_interval = 4 output_vis_interval = 16 steps = 204 assert steps * 2 <= max_steps vis_resolution = 1024 scalar = lambda: ti.var(dt=real) vec = lambda: ti.Vector(2, dt=real) loss = scalar() x = vec() v = vec() goal = [0.9, 0.15] n_objects = 1 ground_height = 0.1 @ti.layout def place():
import taichi as ti ti.init(arch=ti.cpu) n = 320 pixels = ti.var(dt=ti.f32, shape=(n * 2, n)) @ti.func def complex_sqr(z): return ti.Vector([z[0] ** 2 - z[1] ** 2, z[1] * z[0] * 2]) @ti.kernel S for i, j in pixels: # 对于所有像素,并行执行 c = ti.Vector([-0.8, ti.sin(t) * 0.2]) z = ti.Vector([float(i) / n - 1, float(j) / n - 0.5]) * 2 iterations = 0 while z.norm() < 20 and iterations < 50: z = complex_sqr(z) + c iterations += 1 pixels[i, j] = 1 - iterations * 0.02 gui = ti.GUI("Fractal", (n * 2, n)) for i in range(1000000): paint(i * 0.03) gui.set_image(pixels) gui.show()
dim = 2 n_particles = 8192 n_grid = 128 dx = 1 / n_grid inv_dx = 1 / dx dt = 2.0e-4 p_vol = (dx * 0.5)**2 p_rho = 1 p_mass = p_vol * p_rho E = 400 x = ti.Vector(dim, dt=ti.f32, shape=n_particles) v = ti.Vector(dim, dt=ti.f32, shape=n_particles) C = ti.Matrix(dim, dim, dt=ti.f32, shape=n_particles) J = ti.var(dt=ti.f32, shape=n_particles) grid_v = ti.Vector(dim, dt=ti.f32, shape=(n_grid, n_grid)) grid_m = ti.var(dt=ti.f32, shape=(n_grid, n_grid)) ti.cfg.arch = ti.cuda @ti.kernel def substep(): for p in x: base = (x[p] * inv_dx - 0.5).cast(int) fx = x[p] * inv_dx - base.cast(float) w = [ 0.5 * ti.sqr(1.5 - fx), 0.75 - ti.sqr(fx - 1), 0.5 * ti.sqr(fx - 0.5) ]
import taichi as ti import taichi_glsl as ts import taichi_three as t3 ti.init(ti.opengl, kernel_profiler=True) scene = t3.Scene() model = t3.Model() scene.add_model(model) N = 2**12 faces = t3.Face.var() vertices = t3.Vertex.var() ti.root.dense(ti.i, N * 3).place(vertices) ti.root.dense(ti.i, N).place(faces) vertices_len = ti.var(ti.i32, ()) faces_len = ti.var(ti.i32, ()) model.set_vertices(vertices) model.add_geometry(faces) @ti.func def glVertex(pos): l = ti.atomic_add(vertices_len[None], 1) vertices.pos[l] = pos return l @ti.func def glFace(idx): l = ti.atomic_add(faces_len[None], 1)
import taichi as ti import random n = 8 x = ti.var(dt=ti.f32) y = ti.var(dt=ti.f32) L = ti.var(dt=ti.f32) @ti.layout def data(): ti.root.dense(ti.i, n).place(x, y, x.grad, y.grad) # place gradient tensors ti.root.place(L, L.grad) @ti.kernel def reduce(): global L for i in range(n): ti.atomic_add(L, 0.5 * (x[i] - y[i]) ** 2) # Initialize vectors for i in range(n): x[i] = random.random() y[i] = random.random() @ti.kernel def update(): for i in x: x[i] -= x.grad[i] * 0.1 # Optimize with 100 gradient descent iterations for k in range(100):
lambda_epsilon = 100.0 pbf_num_iters = 5 corr_deltaQ_coeff = 0.3 corrK = 0.001 # Need ti.pow() # corrN = 4.0 neighbor_radius = h * 1.05 poly6_factor = 315.0 / 64.0 / np.pi spiky_grad_factor = -45.0 / np.pi old_positions = ti.Vector(dim, dt=ti.f32) positions = ti.Vector(dim, dt=ti.f32) velocities = ti.Vector(dim, dt=ti.f32) # Once taichi supports clear(), we can get rid of grid_num_particles grid_num_particles = ti.var(ti.i32) grid2particles = ti.var(ti.i32) particle_num_neighbors = ti.var(ti.i32) particle_neighbors = ti.var(ti.i32) lambdas = ti.var(ti.f32) position_deltas = ti.Vector(dim, dt=ti.f32) # 0: x-pos, 1: timestep in sin() board_states = ti.Vector(2, dt=ti.f32) @ti.layout def layout(): ti.root.dense(ti.i, num_particles).place(old_positions, positions, velocities) grid_snode = ti.root.dense(ti.ij, grid_size) grid_snode.place(grid_num_particles)
import taichi as ti ti.init() a = ti.var(dt=ti.f32, shape=(42, 63))
import cv2 import os real = ti.f32 ti.set_default_fp(real) # ti.cfg.print_ir = True max_steps = 4096 vis_interval = 256 output_vis_interval = 8 steps = 2048 // 2 assert steps * 2 <= max_steps vis_resolution = 1024 scalar = lambda: ti.var(dt=real) vec = lambda: ti.Vector(2, dt=real) loss = scalar() x = vec() v = vec() v_inc = vec() head_id = 10 goal = vec() n_objects = 0 # target_ball = 0 elasticity = 0.0 ground_height = 0.1
def __init__(self, n, m): self.n = n self.m = m self.val = ti.var(ti.f32, shape=(n, m))
def __init__(self, n, m, increment): self.n = n self.m = m self.val = ti.var(ti.f32) self.total = ti.var(ti.f32) self.increment = increment