def test_oop(): @ti.data_oriented class Array2D: def __init__(self, n, m, increment): self.n = n self.m = m self.val = ti.field(ti.f32) self.total = ti.field(ti.f32) self.increment = increment ti.root.dense(ti.ij, (self.n, self.m)).place(self.val) ti.root.place(self.total) @ti.kernel def inc(self): for i, j in self.val: self.val[i, j] += self.increment @ti.kernel def inc2(self, increment: ti.i32): for i, j in self.val: self.val[i, j] += increment @ti.kernel def reduce(self): for i, j in self.val: self.total[None] += self.val[i, j] * 4 arr = Array2D(128, 128, 3) double_total = ti.field(ti.f32) ti.root.place(double_total) ti.root.lazy_grad() arr.inc() arr.inc.grad() assert arr.val[3, 4] == 3 arr.inc2(4) assert arr.val[3, 4] == 7 with ti.Tape(loss=arr.total): arr.reduce() for i in range(arr.n): for j in range(arr.m): assert arr.val.grad[i, j] == 4 @ti.kernel def double(): double_total[None] = 2 * arr.total[None] with ti.Tape(loss=double_total): arr.reduce() double() for i in range(arr.n): for j in range(arr.m): assert arr.val.grad[i, j] == 8
def test_oop_two_items(): @ti.data_oriented class Array2D: def __init__(self, n, m, increment, multiplier): self.n = n self.m = m self.val = ti.var(ti.f32) self.total = ti.var(ti.f32) self.increment = increment self.multiplier = multiplier def place(self, root): root.dense(ti.ij, (self.n, self.m)).place(self.val) root.place(self.total) @ti.kernel def inc(self): for i, j in self.val: ti.atomic_add(self.val[i, j], self.increment) @ti.kernel def reduce(self): for i, j in self.val: ti.atomic_add(self.total, self.val[i, j] * self.multiplier) arr1_inc, arr1_mult = 3, 4 arr2_inc, arr2_mult = 6, 8 arr1 = Array2D(128, 128, arr1_inc, arr1_mult) arr2 = Array2D(16, 32, arr2_inc, arr2_mult) @ti.layout def place(): # Place an object. Make sure you defined place for that obj ti.root.place(arr1) ti.root.place(arr2) ti.root.lazy_grad() arr1.inc() arr1.inc.grad() arr2.inc() arr2.inc.grad() assert arr1.val[3, 4] == arr1_inc assert arr2.val[8, 6] == arr2_inc with ti.Tape(loss=arr1.total): arr1.reduce() with ti.Tape(loss=arr2.total, clear_gradients=False): arr2.reduce() for i in range(arr1.n): for j in range(arr1.m): assert arr1.val.grad[i, j] == arr1_mult for i in range(arr2.n): for j in range(arr2.m): assert arr2.val.grad[i, j] == arr2_mult
def test_stop_grad2(): x = ti.var(ti.f32) loss = ti.var(ti.f32) n = 128 @ti.layout def place(): ti.root.dense(ti.i, n).place(x) ti.root.place(loss) ti.root.lazy_grad() @ti.kernel def func(): # Two loops, one with stop grad on without for i in range(n): ti.stop_grad(x) ti.atomic_add(loss, x[i]**2) for i in range(n): ti.atomic_add(loss, x[i]**2) for i in range(n): x[i] = i with ti.Tape(loss): func() # If without stop, grad x.grad[i] = i * 4 for i in range(n): assert x.grad[i] == i * 2
def main(): target_img = cv2.resize(cv2.imread('taichi.png'), (n_grid, n_grid))[:, :, 0] / 255.0 for i in range(n_grid): for j in range(n_grid): target[i, j] = target_img[i, j] smoke[0, i, j] = (i // 16 + j // 16) % 2 for opt in range(num_iterations): with ti.Tape(loss): output = "outputs/opt{:03d}".format(opt) if opt % 10 == 0 else None forward(output) velocity_field = np.ones(shape=(n_grid, n_grid, 3), dtype=np.float32) for i in range(n_grid): for j in range(n_grid): s = 0.2 b = 0.5 velocity_field[i, j, 0] = v[0, i, j][0] * s + b velocity_field[i, j, 1] = v[0, i, j][1] * s + b cv2.imshow('velocity', velocity_field) cv2.waitKey(1) print('Iter', opt, ' Loss =', loss[None]) apply_grad() forward("output")
def main(): # initialization target_img = cv2.imread('taichi.png')[:, :, 0] / 255.0 target_img -= target_img.mean() target_img = cv2.resize(target_img, (n_grid, n_grid)) cv2.imshow('target', target_img * amplify + 0.5) # print(target_img.min(), target_img.max()) for i in range(n_grid): for j in range(n_grid): target[i, j] = float(target_img[i, j]) if False: # this is not too exciting... initial[n_grid // 2, n_grid // 2] = -2 forward('center') initial[n_grid // 2, n_grid // 2] = 0 for opt in range(200): with ti.Tape(loss): output = None if opt % 20 == 19: output = 'wave/iter{:03d}/'.format(opt) forward(output) print('Iter', opt, ' Loss =', loss[None]) apply_grad() forward('optimized')
def test_oop_inherit_ok(): # Array1D inherits from object, which makes the callstack being 'class Array2D(object)' # instead of '@ti.data_oriented'. Make sure this also works. @ti.data_oriented class Array1D(object): def __init__(self, n, mul): self.n = n self.val = ti.var(ti.f32) self.total = ti.var(ti.f32) self.mul = mul def place(self, root): root.dense(ti.ij, (self.n, )).place(self.val) root.place(self.total) @ti.kernel def reduce(self): for i, j in self.val: ti.atomic_add(self.total, self.val[i, j] * self.mul) arr = Array1D(128, 42) @ti.layout def place(): # Place an object. Make sure you defined place for that obj ti.root.place(arr) ti.root.lazy_grad() with ti.Tape(loss=arr.total): arr.reduce() for i in range(arr.n): assert arr.val.grad[i] == 42
def test_decorated_primal_missing_decorator(): x = ti.field(ti.f32) total = ti.field(ti.f32) n = 128 ti.root.dense(ti.i, n).place(x) ti.root.place(total) ti.root.lazy_grad() @ti.kernel def func(mul: ti.f32): for i in range(n): ti.atomic_add(total[None], x[i] * mul) def foward(mul): func(mul) func(mul) with pytest.raises(RuntimeError): @ti.ad.grad_for(func) def backward(mul): func.grad(mul) with ti.Tape(loss=total): func(4)
def main(): print("Loading initial and target states...") initial_smoke_img = cv2.imread("init_smoke.png")[:, :, 0] / 255.0 target_img = cv2.resize(cv2.imread('taichi.png'), (n_grid, n_grid))[:, :, 0] / 255.0 for i in range(n_grid): for j in range(n_grid): target[i, j] = target_img[i, j] smoke[0, i, j] = initial_smoke_img[i, j] for opt in range(num_iterations): t = time.time() with ti.Tape(loss): output = "test" if opt % 10 == -1 else None forward(output) print('total time', (time.time() - t) * 1000, 'ms') print('Iter', opt, ' Loss =', loss[None]) apply_grad() print("Compilation time:", ti.get_runtime().prog.get_total_compilation_time()) # ti.profiler_print() forward("output")
def substep(): with ti.Tape(U): # every kernel invocation within this indent scope # will also be accounted into the partial derivate of U # with corresponding input variables like x. compute_U() # will also computes dU/dx and save in x.grad advance()
def test_kernel_template_gradient(): x = ti.field(ti.f32) y = ti.field(ti.f32) z = ti.field(ti.f32) loss = ti.field(ti.f32) ti.root.dense(ti.i, 16).place(x, y, z) ti.root.place(loss) ti.root.lazy_grad() @ti.kernel def double(a: ti.template(), b: ti.template()): for i in range(16): b[i] = a[i] * 2 + 1 @ti.kernel def compute_loss(): for i in range(16): ti.atomic_add(loss[None], z[i]) for i in range(16): x[i] = i with ti.Tape(loss): double(x, y) double(y, z) compute_loss() for i in range(16): assert z[i] == i * 4 + 3 assert x.grad[i] == 4
def task(): for i in range(10): with ti.Tape(loss=loss): # The forward kernel of compute_loss should be completely eliminated (except for the last one) compute_loss() accumulate_grad()
def main(): # set initial values of x component of velocity for container init_v[None] = 0.1 # INITIALIZE SCENE scene = Scene() initialize_env(scene) scene.finalize() # wrap up intialization by reupdating global scene parm clear_physical_params( scene) # clear physical parameters for new simulation losses = [] init_v.grad[None] = 1 for n in range(num_iters): # output defines the name of the directory where files stuck.fill(0) ti.clear_all_gradients() if n % 1 == 0: output = 'obst_figs/iter{:04d}'.format(n) else: output = None with ti.Tape(loss): forward(output, visualize=False) print('Iter =', n, 'Loss =', loss[None], ' ') init_v[None] -= learning_rate * init_v.grad[None] print('init_v =', init_v[None], ' ') print('init_v.grad =', init_v.grad[None], ' ')
def optimize(): initialize() forward(visualize=True, output='initial') losses = [] for iter in range(200000): initialize() vis = iter % 200 == 0 output = None if vis: output = 'iter{:05d}'.format(iter) with ti.Tape(loss): forward(visualize=vis, output=output) losses.append(loss[None]) # print(iter, "loss", loss[None]) if vis: print(iter, sum(losses)) losses.clear() tot = 0 for i in range(8): for j in range(n_hidden): weight1[i, j] = weight1[i, j] - weight1.grad[i, j] * learning_rate tot += weight1.grad[i, j] ** 2 # print(tot) for j in range(n_hidden): bias1[j] = bias1[j] - bias1.grad[j] * learning_rate for i in range(n_hidden): for j in range(n_gravitation): weight2[i, j] = weight2[i, j] - weight2.grad[i, j] * learning_rate for j in range(n_gravitation): bias2[j] = bias2[j] - bias2.grad[j] * learning_rate forward(visualize=True, output='final')
def main(): """ Differentiable programming framework can be found in https://taichi.readthedocs.io/en/latest/syntax.html#kernels """ # read in figures img = cv2.imread('erythrocyte.png')[:, :, 0] # normalization img = img / 255.0 img -= img.mean() img = cv2.resize(img, (n, n)) for i in range(n): for j in range(n): u_hat[i, j] = float(img[i, j]) losses = [] for it in range(100): # encapsulate loss in taichi with ti.Tape(loss): forward() print('Iter {} Loss = {}'.format(it, loss[None])) losses.append(loss[None]) # update gradient apply_grad() # output loss curve plt.set_xlabel("Iteration") plt.set_ylabel("Loss") plt.plot(losses) plt.show()
def test_ad_frac(): @ti.func def frac(x): fractional = x - ti.floor(x) if x > 0. else x - ti.ceil(x) return fractional @ti.kernel def ti_frac(input_field: ti.template(), output_field: ti.template()): for i in input_field: output_field[i] = frac(input_field[i])**2 @ti.kernel def calc_loss(input_field: ti.template(), loss: ti.template()): for i in input_field: loss[None] += input_field[i] n = 10 field0 = ti.field(dtype=ti.f32, shape=(n, ), needs_grad=True) randoms = np.random.randn(10).astype(np.float32) field0.from_numpy(randoms) field1 = ti.field(dtype=ti.f32, shape=(n, ), needs_grad=True) loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True) with ti.Tape(loss): ti_frac(field0, field1) calc_loss(field1, loss) grads = field0.grad.to_numpy() expected = np.modf(randoms)[0] * 2 for i in range(n): assert grads[i] == test_utils.approx(expected[i], rel=1e-4)
def substep(): with ti.Tape(U): # Kernel invocations in this scope contribute to partial derivatives of # U with respect to input variables such as x. compute_U( ) # The tape will automatically compute dU/dx and save the results in x.grad advance()
def test_complex_kernels(): x = ti.var(ti.f32) total = ti.var(ti.f32) n = 128 @ti.layout def place(): ti.root.dense(ti.i, n).place(x) ti.root.place(total) ti.root.lazy_grad() @ti.kernel def func(mul: ti.f32): for i in range(n): ti.atomic_add(total[None], x[i] * mul) @ti.complex_kernel def forward(mul): func(mul) func(mul) @ti.complex_kernel_grad(forward) def backward(mul): func.grad(mul) with ti.Tape(loss=total): forward(4) for i in range(n): assert x.grad[0] == 4
def test_complex_kernels_oop(): @ti.data_oriented class A: def __init__(self): self.x = ti.var(ti.f32) self.total = ti.var(ti.f32) self.n = 128 ti.root.dense(ti.i, self.n).place(self.x) ti.root.place(self.total) @ti.kernel def func(self, mul: ti.f32): for i in range(self.n): ti.atomic_add(self.total[None], self.x[i] * mul) @ti.complex_kernel def forward(self, mul): self.func(mul) self.func(mul) @ti.complex_kernel_grad(forward) def backward(self, mul): self.func.grad(mul) a = A() ti.root.lazy_grad() with ti.Tape(loss=a.total): a.forward(4) for i in range(a.n): assert a.x.grad[0] == 4
def test_multiple_ib_inner_mixed(): x = ti.field(float, (), needs_grad=True) y = ti.field(float, (), needs_grad=True) @ti.kernel def compute_y(): for j in range(2): for i in range(3): y[None] += x[None] for i in range(3): for ii in range(2): y[None] += x[None] for iii in range(2): y[None] += x[None] for iiii in range(2): y[None] += x[None] for i in range(3): for ii in range(2): for iii in range(2): y[None] += x[None] x[None] = 1.0 with ti.Tape(y): compute_y() assert y[None] == 78.0 assert x.grad[None] == 78.0
def main(): initialize() vertices_ = vertices.to_numpy() while gui.running and not gui.get_event(gui.ESCAPE): for s in range(int(1e-2 // dt)): grid_m.fill(0) grid_v.fill(0) # Note that we are now differentiating the total energy w.r.t. the particle position. # Recall that F = - \partial (total_energy) / \partial x with ti.Tape(total_energy): # Do the forward computation of total energy and backward propagation for x.grad, which is later used in p2g compute_total_energy() # It's OK not to use the computed total_energy at all, since we only need x.grad p2g() grid_op() g2p() gui.circle((0.5, 0.5), radius=45, color=0x068587) particle_pos = x.to_numpy() a = vertices_.reshape(n_elements * 3) b = np.roll(vertices_, shift=1, axis=1).reshape(n_elements * 3) gui.lines(particle_pos[a], particle_pos[b], radius=1, color=0x4FB99F) gui.circles(particle_pos, radius=1.5, color=0xF2B134) gui.line((0.00, 0.03 / quality), (1.0, 0.03 / quality), color=0xFFFFFF, radius=3) gui.show() ti.kernel_profiler_print()
def test_customized_kernels_oop2(): @ti.data_oriented class A: def __init__(self): self.x = ti.field(ti.f32) self.total = ti.field(ti.f32) self.n = 128 ti.root.dense(ti.i, self.n).place(self.x) ti.root.place(self.total) @ti.kernel def func(self, mul: ti.f32): for i in range(self.n): ti.atomic_add(self.total[None], self.x[i] * mul) def func_proxy(self, mul): self.func(mul) @ti.ad.grad_replaced def forward(self, mul): self.func_proxy(mul) self.func_proxy(mul) @ti.ad.grad_for(forward) def backward(self, mul): self.func.grad(mul) a = A() ti.root.lazy_grad() with ti.Tape(loss=a.total): a.forward(4) assert a.x.grad[0] == 4
def main(): init_mesh() init_pos() gravity[None] = [0, -1] print( "[Hint] Use WSAD/arrow keys to control gravity. Use left/right mouse bottons to attract/repel. Press R to reset." ) while window.running: for e in window.get_events(ti.ui.PRESS): if e.key == ti.ui.ESCAPE: window.running = False elif e.key == 'r': init_pos() elif e.key in ('a', ti.ui.LEFT): gravity[None] = [-1, 0] elif e.key in ('d', ti.ui.RIGHT): gravity[None] = [+1, 0] elif e.key in ('s', ti.ui.DOWN): gravity[None] = [0, -1] elif e.key in ('w', ti.ui.UP): gravity[None] = [0, +1] mouse_pos = window.get_cursor_pos() attractor_pos[None] = mouse_pos attractor_strength[None] = window.is_pressed( ti.ui.LMB) - window.is_pressed(ti.ui.RMB) for i in range(50): with ti.Tape(loss=U): update_U() advance() render() window.show()
def test_customized_kernels_tape(): x = ti.field(ti.f32) total = ti.field(ti.f32) n = 128 ti.root.dense(ti.i, n).place(x) ti.root.place(total) ti.root.lazy_grad() @ti.kernel def func(mul: ti.f32): for i in range(n): ti.atomic_add(total[None], x[i] * mul) @ti.ad.grad_replaced def forward(mul): func(mul) func(mul) @ti.ad.grad_for(forward) def backward(mul): func.grad(mul) with ti.Tape(loss=total): forward(4) assert x.grad[0] == 4
def test_stop_grad2(): x = ti.field(ti.f32) loss = ti.field(ti.f32) n = 128 ti.root.dense(ti.i, n).place(x) ti.root.place(loss) ti.root.lazy_grad() @ti.kernel def func(): # Two loops, one with stop grad on without for i in range(n): ti.stop_grad(x) loss[None] += x[i]**2 for i in range(n): loss[None] += x[i]**2 for i in range(n): x[i] = i with ti.Tape(loss): func() # If without stop, grad x.grad[i] = i * 4 for i in range(n): assert x.grad[i] == i * 2
def test_normal_grad(): x = ti.var(ti.f32) loss = ti.var(ti.f32) n = 128 @ti.layout def place(): ti.root.dense(ti.i, n).place(x) ti.root.place(loss) ti.root.lazy_grad() @ti.kernel def func(): for i in range(n): ti.atomic_add(loss, x[i]**2) for i in range(n): x[i] = i with ti.Tape(loss): func() for i in range(n): assert x.grad[i] == i * 2
def test_oop_inherit_ok(): # Array1D inherits from object, which makes the callstack being 'class Array2D(object)' # instead of '@ti.data_oriented'. Make sure this also works. @ti.data_oriented class Array1D(object): def __init__(self, n, mul): self.n = n self.val = ti.field(ti.f32) self.total = ti.field(ti.f32) self.mul = mul ti.root.dense(ti.ij, (self.n, )).place(self.val) ti.root.place(self.total) @ti.kernel def reduce(self): for i, j in self.val: self.total[None] += self.val[i, j] * self.mul arr = Array1D(128, 42) ti.root.lazy_grad() with ti.Tape(loss=arr.total): arr.reduce() for i in range(arr.n): for j in range(arr.n): assert arr.val.grad[i, j] == 42
def test_oop_two_items(): @ti.data_oriented class Array2D: def __init__(self, n, m, increment, multiplier): self.n = n self.m = m self.val = ti.field(ti.f32) self.total = ti.field(ti.f32) self.increment = increment self.multiplier = multiplier ti.root.dense(ti.ij, (self.n, self.m)).place(self.val) ti.root.place(self.total) @ti.kernel def inc(self): for i, j in self.val: self.val[i, j] += self.increment @ti.kernel def reduce(self): for i, j in self.val: self.total[None] += self.val[i, j] * self.multiplier arr1_inc, arr1_mult = 3, 4 arr2_inc, arr2_mult = 6, 8 arr1 = Array2D(128, 128, arr1_inc, arr1_mult) arr2 = Array2D(16, 32, arr2_inc, arr2_mult) ti.root.lazy_grad() arr1.inc() arr1.inc.grad() arr2.inc() arr2.inc.grad() assert arr1.val[3, 4] == arr1_inc assert arr2.val[8, 6] == arr2_inc with ti.Tape(loss=arr1.total): arr1.reduce() with ti.Tape(loss=arr2.total, clear_gradients=False): arr2.reduce() for i in range(arr1.n): for j in range(arr1.m): assert arr1.val.grad[i, j] == arr1_mult for i in range(arr2.n): for j in range(arr2.m): assert arr2.val.grad[i, j] == arr2_mult
def gradient(alpha, num_steps): damping[None] = math.exp(-dt * alpha) a[None] = 1 with ti.Tape(loss): for i in range(1, num_steps): advance(i) compute_loss(num_steps - 1) return loss[None]
def optimize(toi=True, visualize=True): global use_toi use_toi = toi for i in range(n_hidden): for j in range(n_input_states()): weights1[i, j] = np.random.randn() * math.sqrt( 2 / (n_hidden + n_input_states())) * 0.5 for i in range(n_springs): for j in range(n_hidden): # TODO: n_springs should be n_actuators weights2[i, j] = np.random.randn() * math.sqrt( 2 / (n_hidden + n_springs)) * 1 ''' if visualize: clear_states() forward('initial{}'.format(robot_id)) ''' losses = [] for iter in range(20): clear_states() with ti.Tape(loss): forward(visualize=visualize) print('Iter=', iter, 'Loss=', loss[None]) total_norm_sqr = 0 for i in range(n_hidden): for j in range(n_input_states()): total_norm_sqr += weights1.grad[i, j]**2 total_norm_sqr += bias1.grad[i]**2 for i in range(n_springs): for j in range(n_hidden): total_norm_sqr += weights2.grad[i, j]**2 total_norm_sqr += bias2.grad[i]**2 print(total_norm_sqr) gradient_clip = 0.2 scale = learning_rate * min( 1.0, gradient_clip / (total_norm_sqr**0.5 + 1e-4)) for i in range(n_hidden): for j in range(n_input_states()): weights1[i, j] -= scale * weights1.grad[i, j] bias1[i] -= scale * bias1.grad[i] for i in range(n_springs): for j in range(n_hidden): weights2[i, j] -= scale * weights2.grad[i, j] bias2[i] -= scale * bias2.grad[i] losses.append(loss[None]) return losses
def optimize(visualize): for i in range(n_hidden): for j in range(n_input_states()): weights1[i, j] = np.random.randn() * math.sqrt( 2 / (n_hidden + n_input_states())) * 2 for i in range(n_springs): for j in range(n_hidden): # TODO: n_springs should be n_actuators weights2[i, j] = np.random.randn() * math.sqrt( 2 / (n_hidden + n_springs)) * 3 losses = [] # forward('initial{}'.format(robot_id), visualize=visualize) for iter in range(200): clear() import time t = time.time() with ti.Tape(loss): forward(visualize=iter % 10 == 0) print(time.time() - t, ' 1') print('Iter=', iter, 'Loss=', loss[None]) total_norm_sqr = 0 for i in range(n_hidden): for j in range(n_input_states()): total_norm_sqr += weights1.grad[i, j]**2 total_norm_sqr += bias1.grad[i]**2 for i in range(n_springs): for j in range(n_hidden): total_norm_sqr += weights2.grad[i, j]**2 total_norm_sqr += bias2.grad[i]**2 print(total_norm_sqr) # scale = learning_rate * min(1.0, gradient_clip / total_norm_sqr ** 0.5) gradient_clip = 0.1 scale = gradient_clip / (total_norm_sqr**0.5 + 1e-6) for i in range(n_hidden): for j in range(n_input_states()): weights1[i, j] -= scale * weights1.grad[i, j] bias1[i] -= scale * bias1.grad[i] for i in range(n_springs): for j in range(n_hidden): weights2[i, j] -= scale * weights2.grad[i, j] bias2[i] -= scale * bias2.grad[i] losses.append(loss[None]) print(time.time() - t, ' 2') losses = gaussian_filter(losses, 10) return losses