Ejemplo n.º 1
0
def test_oop():
    @ti.data_oriented
    class Array2D:
        def __init__(self, n, m, increment):
            self.n = n
            self.m = m
            self.val = ti.field(ti.f32)
            self.total = ti.field(ti.f32)
            self.increment = increment

            ti.root.dense(ti.ij, (self.n, self.m)).place(self.val)
            ti.root.place(self.total)

        @ti.kernel
        def inc(self):
            for i, j in self.val:
                self.val[i, j] += self.increment

        @ti.kernel
        def inc2(self, increment: ti.i32):
            for i, j in self.val:
                self.val[i, j] += increment

        @ti.kernel
        def reduce(self):
            for i, j in self.val:
                self.total[None] += self.val[i, j] * 4

    arr = Array2D(128, 128, 3)

    double_total = ti.field(ti.f32)

    ti.root.place(double_total)
    ti.root.lazy_grad()

    arr.inc()
    arr.inc.grad()
    assert arr.val[3, 4] == 3
    arr.inc2(4)
    assert arr.val[3, 4] == 7

    with ti.Tape(loss=arr.total):
        arr.reduce()

    for i in range(arr.n):
        for j in range(arr.m):
            assert arr.val.grad[i, j] == 4

    @ti.kernel
    def double():
        double_total[None] = 2 * arr.total[None]

    with ti.Tape(loss=double_total):
        arr.reduce()
        double()

    for i in range(arr.n):
        for j in range(arr.m):
            assert arr.val.grad[i, j] == 8
Ejemplo n.º 2
0
def test_oop_two_items():
    @ti.data_oriented
    class Array2D:
        def __init__(self, n, m, increment, multiplier):
            self.n = n
            self.m = m
            self.val = ti.var(ti.f32)
            self.total = ti.var(ti.f32)
            self.increment = increment
            self.multiplier = multiplier

        def place(self, root):
            root.dense(ti.ij, (self.n, self.m)).place(self.val)
            root.place(self.total)

        @ti.kernel
        def inc(self):
            for i, j in self.val:
                ti.atomic_add(self.val[i, j], self.increment)

        @ti.kernel
        def reduce(self):
            for i, j in self.val:
                ti.atomic_add(self.total, self.val[i, j] * self.multiplier)

    arr1_inc, arr1_mult = 3, 4
    arr2_inc, arr2_mult = 6, 8
    arr1 = Array2D(128, 128, arr1_inc, arr1_mult)
    arr2 = Array2D(16, 32, arr2_inc, arr2_mult)

    @ti.layout
    def place():
        # Place an object. Make sure you defined place for that obj
        ti.root.place(arr1)
        ti.root.place(arr2)
        ti.root.lazy_grad()

    arr1.inc()
    arr1.inc.grad()
    arr2.inc()
    arr2.inc.grad()
    assert arr1.val[3, 4] == arr1_inc
    assert arr2.val[8, 6] == arr2_inc

    with ti.Tape(loss=arr1.total):
        arr1.reduce()
    with ti.Tape(loss=arr2.total, clear_gradients=False):
        arr2.reduce()
    for i in range(arr1.n):
        for j in range(arr1.m):
            assert arr1.val.grad[i, j] == arr1_mult
    for i in range(arr2.n):
        for j in range(arr2.m):
            assert arr2.val.grad[i, j] == arr2_mult
Ejemplo n.º 3
0
def test_stop_grad2():
    x = ti.var(ti.f32)
    loss = ti.var(ti.f32)

    n = 128

    @ti.layout
    def place():
        ti.root.dense(ti.i, n).place(x)
        ti.root.place(loss)
        ti.root.lazy_grad()

    @ti.kernel
    def func():
        # Two loops, one with stop grad on without
        for i in range(n):
            ti.stop_grad(x)
            ti.atomic_add(loss, x[i]**2)
        for i in range(n):
            ti.atomic_add(loss, x[i]**2)

    for i in range(n):
        x[i] = i

    with ti.Tape(loss):
        func()

    # If without stop, grad x.grad[i] = i * 4
    for i in range(n):
        assert x.grad[i] == i * 2
Ejemplo n.º 4
0
def main():
    target_img = cv2.resize(cv2.imread('taichi.png'),
                            (n_grid, n_grid))[:, :, 0] / 255.0

    for i in range(n_grid):
        for j in range(n_grid):
            target[i, j] = target_img[i, j]
            smoke[0, i, j] = (i // 16 + j // 16) % 2

    for opt in range(num_iterations):
        with ti.Tape(loss):
            output = "outputs/opt{:03d}".format(opt) if opt % 10 == 0 else None
            forward(output)
            velocity_field = np.ones(shape=(n_grid, n_grid, 3),
                                     dtype=np.float32)
            for i in range(n_grid):
                for j in range(n_grid):
                    s = 0.2
                    b = 0.5
                    velocity_field[i, j, 0] = v[0, i, j][0] * s + b
                    velocity_field[i, j, 1] = v[0, i, j][1] * s + b
            cv2.imshow('velocity', velocity_field)
            cv2.waitKey(1)

        print('Iter', opt, ' Loss =', loss[None])
        apply_grad()

    forward("output")
Ejemplo n.º 5
0
def main():
    # initialization
    target_img = cv2.imread('taichi.png')[:, :, 0] / 255.0
    target_img -= target_img.mean()
    target_img = cv2.resize(target_img, (n_grid, n_grid))
    cv2.imshow('target', target_img * amplify + 0.5)
    # print(target_img.min(), target_img.max())
    for i in range(n_grid):
        for j in range(n_grid):
            target[i, j] = float(target_img[i, j])

    if False:
        # this is not too exciting...
        initial[n_grid // 2, n_grid // 2] = -2
        forward('center')
        initial[n_grid // 2, n_grid // 2] = 0

    for opt in range(200):
        with ti.Tape(loss):
            output = None
            if opt % 20 == 19:
                output = 'wave/iter{:03d}/'.format(opt)
            forward(output)

        print('Iter', opt, ' Loss =', loss[None])

        apply_grad()

    forward('optimized')
Ejemplo n.º 6
0
def test_oop_inherit_ok():
    # Array1D inherits from object, which makes the callstack being 'class Array2D(object)'
    # instead of '@ti.data_oriented'. Make sure this also works.
    @ti.data_oriented
    class Array1D(object):
        def __init__(self, n, mul):
            self.n = n
            self.val = ti.var(ti.f32)
            self.total = ti.var(ti.f32)
            self.mul = mul

        def place(self, root):
            root.dense(ti.ij, (self.n, )).place(self.val)
            root.place(self.total)

        @ti.kernel
        def reduce(self):
            for i, j in self.val:
                ti.atomic_add(self.total, self.val[i, j] * self.mul)

    arr = Array1D(128, 42)

    @ti.layout
    def place():
        # Place an object. Make sure you defined place for that obj
        ti.root.place(arr)
        ti.root.lazy_grad()

    with ti.Tape(loss=arr.total):
        arr.reduce()
    for i in range(arr.n):
        assert arr.val.grad[i] == 42
Ejemplo n.º 7
0
def test_decorated_primal_missing_decorator():
    x = ti.field(ti.f32)
    total = ti.field(ti.f32)

    n = 128

    ti.root.dense(ti.i, n).place(x)
    ti.root.place(total)
    ti.root.lazy_grad()

    @ti.kernel
    def func(mul: ti.f32):
        for i in range(n):
            ti.atomic_add(total[None], x[i] * mul)

    def foward(mul):
        func(mul)
        func(mul)

    with pytest.raises(RuntimeError):

        @ti.ad.grad_for(func)
        def backward(mul):
            func.grad(mul)

    with ti.Tape(loss=total):
        func(4)
Ejemplo n.º 8
0
def main():
    print("Loading initial and target states...")
    initial_smoke_img = cv2.imread("init_smoke.png")[:, :, 0] / 255.0
    target_img = cv2.resize(cv2.imread('taichi.png'),
                            (n_grid, n_grid))[:, :, 0] / 255.0

    for i in range(n_grid):
        for j in range(n_grid):
            target[i, j] = target_img[i, j]
            smoke[0, i, j] = initial_smoke_img[i, j]

    for opt in range(num_iterations):
        t = time.time()
        with ti.Tape(loss):
            output = "test" if opt % 10 == -1 else None
            forward(output)
        print('total time', (time.time() - t) * 1000, 'ms')

        print('Iter', opt, ' Loss =', loss[None])
        apply_grad()
        print("Compilation time:",
              ti.get_runtime().prog.get_total_compilation_time())
        # ti.profiler_print()

    forward("output")
Ejemplo n.º 9
0
def substep():
    with ti.Tape(U):
        # every kernel invocation within this indent scope
        # will also be accounted into the partial derivate of U
        # with corresponding input variables like x.
        compute_U()  # will also computes dU/dx and save in x.grad
    advance()
Ejemplo n.º 10
0
def test_kernel_template_gradient():
    x = ti.field(ti.f32)
    y = ti.field(ti.f32)
    z = ti.field(ti.f32)
    loss = ti.field(ti.f32)

    ti.root.dense(ti.i, 16).place(x, y, z)
    ti.root.place(loss)
    ti.root.lazy_grad()

    @ti.kernel
    def double(a: ti.template(), b: ti.template()):
        for i in range(16):
            b[i] = a[i] * 2 + 1

    @ti.kernel
    def compute_loss():
        for i in range(16):
            ti.atomic_add(loss[None], z[i])

    for i in range(16):
        x[i] = i

    with ti.Tape(loss):
        double(x, y)
        double(y, z)
        compute_loss()

    for i in range(16):
        assert z[i] == i * 4 + 3
        assert x.grad[i] == 4
Ejemplo n.º 11
0
    def task():
        for i in range(10):
            with ti.Tape(loss=loss):
                # The forward kernel of compute_loss should be completely eliminated (except for the last one)
                compute_loss()

            accumulate_grad()
Ejemplo n.º 12
0
def main():
    # set initial values of x component of velocity for container
    init_v[None] = 0.1

    # INITIALIZE SCENE
    scene = Scene()
    initialize_env(scene)
    scene.finalize()  # wrap up intialization by reupdating global scene parm
    clear_physical_params(
        scene)  # clear physical parameters for new simulation

    losses = []
    init_v.grad[None] = 1
    for n in range(num_iters):
        # output defines the name of the directory where files
        stuck.fill(0)
        ti.clear_all_gradients()
        if n % 1 == 0:
            output = 'obst_figs/iter{:04d}'.format(n)
        else:
            output = None

        with ti.Tape(loss):
            forward(output, visualize=False)

        print('Iter =', n, 'Loss =', loss[None], '                  ')

        init_v[None] -= learning_rate * init_v.grad[None]
        print('init_v =', init_v[None], '                  ')
        print('init_v.grad =', init_v.grad[None], '                  ')
Ejemplo n.º 13
0
def optimize():
  initialize()
  forward(visualize=True, output='initial')
  
  losses = []
  for iter in range(200000):
    initialize()
    vis = iter % 200 == 0
    output = None
    if vis:
      output = 'iter{:05d}'.format(iter)
    with ti.Tape(loss):
      forward(visualize=vis, output=output)
    losses.append(loss[None])
    # print(iter, "loss", loss[None])
    if vis:
      print(iter, sum(losses))
      losses.clear()
    
    tot = 0
    for i in range(8):
      for j in range(n_hidden):
        weight1[i, j] = weight1[i, j] - weight1.grad[i, j] * learning_rate
        tot += weight1.grad[i, j] ** 2
    # print(tot)
    for j in range(n_hidden):
      bias1[j] = bias1[j] - bias1.grad[j] * learning_rate
    
    for i in range(n_hidden):
      for j in range(n_gravitation):
        weight2[i, j] = weight2[i, j] - weight2.grad[i, j] * learning_rate
    for j in range(n_gravitation):
      bias2[j] = bias2[j] - bias2.grad[j] * learning_rate
  
  forward(visualize=True, output='final')
Ejemplo n.º 14
0
def main():
    """
    Differentiable programming framework can be found in
    https://taichi.readthedocs.io/en/latest/syntax.html#kernels
    """
    # read in figures
    img = cv2.imread('erythrocyte.png')[:, :, 0]
    # normalization
    img = img / 255.0
    img -= img.mean()
    img = cv2.resize(img, (n, n))
    for i in range(n):
        for j in range(n):
            u_hat[i, j] = float(img[i, j])

    losses = []
    for it in range(100):
        # encapsulate loss in taichi
        with ti.Tape(loss):
            forward()
        print('Iter {} Loss = {}'.format(it, loss[None]))
        losses.append(loss[None])
        # update gradient
        apply_grad()

    # output loss curve
    plt.set_xlabel("Iteration")
    plt.set_ylabel("Loss")
    plt.plot(losses)
    plt.show()
Ejemplo n.º 15
0
def test_ad_frac():
    @ti.func
    def frac(x):
        fractional = x - ti.floor(x) if x > 0. else x - ti.ceil(x)
        return fractional

    @ti.kernel
    def ti_frac(input_field: ti.template(), output_field: ti.template()):
        for i in input_field:
            output_field[i] = frac(input_field[i])**2

    @ti.kernel
    def calc_loss(input_field: ti.template(), loss: ti.template()):
        for i in input_field:
            loss[None] += input_field[i]

    n = 10
    field0 = ti.field(dtype=ti.f32, shape=(n, ), needs_grad=True)
    randoms = np.random.randn(10).astype(np.float32)
    field0.from_numpy(randoms)
    field1 = ti.field(dtype=ti.f32, shape=(n, ), needs_grad=True)
    loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True)

    with ti.Tape(loss):
        ti_frac(field0, field1)
        calc_loss(field1, loss)

    grads = field0.grad.to_numpy()
    expected = np.modf(randoms)[0] * 2
    for i in range(n):
        assert grads[i] == test_utils.approx(expected[i], rel=1e-4)
Ejemplo n.º 16
0
def substep():
    with ti.Tape(U):
        # Kernel invocations in this scope contribute to partial derivatives of
        # U with respect to input variables such as x.
        compute_U(
        )  # The tape will automatically compute dU/dx and save the results in x.grad
    advance()
Ejemplo n.º 17
0
def test_complex_kernels():
    x = ti.var(ti.f32)
    total = ti.var(ti.f32)

    n = 128

    @ti.layout
    def place():
        ti.root.dense(ti.i, n).place(x)
        ti.root.place(total)
        ti.root.lazy_grad()

    @ti.kernel
    def func(mul: ti.f32):
        for i in range(n):
            ti.atomic_add(total[None], x[i] * mul)

    @ti.complex_kernel
    def forward(mul):
        func(mul)
        func(mul)

    @ti.complex_kernel_grad(forward)
    def backward(mul):
        func.grad(mul)

    with ti.Tape(loss=total):
        forward(4)
    for i in range(n):
        assert x.grad[0] == 4
Ejemplo n.º 18
0
def test_complex_kernels_oop():
    @ti.data_oriented
    class A:
        def __init__(self):
            self.x = ti.var(ti.f32)
            self.total = ti.var(ti.f32)
            self.n = 128

            ti.root.dense(ti.i, self.n).place(self.x)
            ti.root.place(self.total)

        @ti.kernel
        def func(self, mul: ti.f32):
            for i in range(self.n):
                ti.atomic_add(self.total[None], self.x[i] * mul)

        @ti.complex_kernel
        def forward(self, mul):
            self.func(mul)
            self.func(mul)

        @ti.complex_kernel_grad(forward)
        def backward(self, mul):
            self.func.grad(mul)

    a = A()

    ti.root.lazy_grad()

    with ti.Tape(loss=a.total):
        a.forward(4)
    for i in range(a.n):
        assert a.x.grad[0] == 4
Ejemplo n.º 19
0
def test_multiple_ib_inner_mixed():
    x = ti.field(float, (), needs_grad=True)
    y = ti.field(float, (), needs_grad=True)

    @ti.kernel
    def compute_y():
        for j in range(2):
            for i in range(3):
                y[None] += x[None]
            for i in range(3):
                for ii in range(2):
                    y[None] += x[None]
                for iii in range(2):
                    y[None] += x[None]
                    for iiii in range(2):
                        y[None] += x[None]
            for i in range(3):
                for ii in range(2):
                    for iii in range(2):
                        y[None] += x[None]

    x[None] = 1.0
    with ti.Tape(y):
        compute_y()

    assert y[None] == 78.0
    assert x.grad[None] == 78.0
Ejemplo n.º 20
0
def main():
    initialize()

    vertices_ = vertices.to_numpy()

    while gui.running and not gui.get_event(gui.ESCAPE):
        for s in range(int(1e-2 // dt)):
            grid_m.fill(0)
            grid_v.fill(0)
            # Note that we are now differentiating the total energy w.r.t. the particle position.
            # Recall that F = - \partial (total_energy) / \partial x
            with ti.Tape(total_energy):
                # Do the forward computation of total energy and backward propagation for x.grad, which is later used in p2g
                compute_total_energy()
                # It's OK not to use the computed total_energy at all, since we only need x.grad
            p2g()
            grid_op()
            g2p()

        gui.circle((0.5, 0.5), radius=45, color=0x068587)
        particle_pos = x.to_numpy()
        a = vertices_.reshape(n_elements * 3)
        b = np.roll(vertices_, shift=1, axis=1).reshape(n_elements * 3)
        gui.lines(particle_pos[a], particle_pos[b], radius=1, color=0x4FB99F)
        gui.circles(particle_pos, radius=1.5, color=0xF2B134)
        gui.line((0.00, 0.03 / quality), (1.0, 0.03 / quality),
                 color=0xFFFFFF,
                 radius=3)
        gui.show()
        ti.kernel_profiler_print()
Ejemplo n.º 21
0
def test_customized_kernels_oop2():
    @ti.data_oriented
    class A:
        def __init__(self):
            self.x = ti.field(ti.f32)
            self.total = ti.field(ti.f32)
            self.n = 128

            ti.root.dense(ti.i, self.n).place(self.x)
            ti.root.place(self.total)

        @ti.kernel
        def func(self, mul: ti.f32):
            for i in range(self.n):
                ti.atomic_add(self.total[None], self.x[i] * mul)

        def func_proxy(self, mul):
            self.func(mul)

        @ti.ad.grad_replaced
        def forward(self, mul):
            self.func_proxy(mul)
            self.func_proxy(mul)

        @ti.ad.grad_for(forward)
        def backward(self, mul):
            self.func.grad(mul)

    a = A()

    ti.root.lazy_grad()

    with ti.Tape(loss=a.total):
        a.forward(4)
    assert a.x.grad[0] == 4
Ejemplo n.º 22
0
def main():
    init_mesh()
    init_pos()
    gravity[None] = [0, -1]

    print(
        "[Hint] Use WSAD/arrow keys to control gravity. Use left/right mouse bottons to attract/repel. Press R to reset."
    )
    while window.running:
        for e in window.get_events(ti.ui.PRESS):
            if e.key == ti.ui.ESCAPE:
                window.running = False
            elif e.key == 'r':
                init_pos()
            elif e.key in ('a', ti.ui.LEFT):
                gravity[None] = [-1, 0]
            elif e.key in ('d', ti.ui.RIGHT):
                gravity[None] = [+1, 0]
            elif e.key in ('s', ti.ui.DOWN):
                gravity[None] = [0, -1]
            elif e.key in ('w', ti.ui.UP):
                gravity[None] = [0, +1]

        mouse_pos = window.get_cursor_pos()
        attractor_pos[None] = mouse_pos
        attractor_strength[None] = window.is_pressed(
            ti.ui.LMB) - window.is_pressed(ti.ui.RMB)
        for i in range(50):
            with ti.Tape(loss=U):
                update_U()
            advance()
        render()
        window.show()
Ejemplo n.º 23
0
def test_customized_kernels_tape():
    x = ti.field(ti.f32)
    total = ti.field(ti.f32)

    n = 128

    ti.root.dense(ti.i, n).place(x)
    ti.root.place(total)
    ti.root.lazy_grad()

    @ti.kernel
    def func(mul: ti.f32):
        for i in range(n):
            ti.atomic_add(total[None], x[i] * mul)

    @ti.ad.grad_replaced
    def forward(mul):
        func(mul)
        func(mul)

    @ti.ad.grad_for(forward)
    def backward(mul):
        func.grad(mul)

    with ti.Tape(loss=total):
        forward(4)
    assert x.grad[0] == 4
Ejemplo n.º 24
0
def test_stop_grad2():
    x = ti.field(ti.f32)
    loss = ti.field(ti.f32)

    n = 128

    ti.root.dense(ti.i, n).place(x)
    ti.root.place(loss)
    ti.root.lazy_grad()

    @ti.kernel
    def func():
        # Two loops, one with stop grad on without
        for i in range(n):
            ti.stop_grad(x)
            loss[None] += x[i]**2
        for i in range(n):
            loss[None] += x[i]**2

    for i in range(n):
        x[i] = i

    with ti.Tape(loss):
        func()

    # If without stop, grad x.grad[i] = i * 4
    for i in range(n):
        assert x.grad[i] == i * 2
Ejemplo n.º 25
0
def test_normal_grad():
    x = ti.var(ti.f32)
    loss = ti.var(ti.f32)

    n = 128

    @ti.layout
    def place():
        ti.root.dense(ti.i, n).place(x)
        ti.root.place(loss)
        ti.root.lazy_grad()

    @ti.kernel
    def func():
        for i in range(n):
            ti.atomic_add(loss, x[i]**2)

    for i in range(n):
        x[i] = i

    with ti.Tape(loss):
        func()

    for i in range(n):
        assert x.grad[i] == i * 2
Ejemplo n.º 26
0
def test_oop_inherit_ok():
    # Array1D inherits from object, which makes the callstack being 'class Array2D(object)'
    # instead of '@ti.data_oriented'. Make sure this also works.
    @ti.data_oriented
    class Array1D(object):
        def __init__(self, n, mul):
            self.n = n
            self.val = ti.field(ti.f32)
            self.total = ti.field(ti.f32)
            self.mul = mul
            ti.root.dense(ti.ij, (self.n, )).place(self.val)
            ti.root.place(self.total)

        @ti.kernel
        def reduce(self):
            for i, j in self.val:
                self.total[None] += self.val[i, j] * self.mul

    arr = Array1D(128, 42)

    ti.root.lazy_grad()

    with ti.Tape(loss=arr.total):
        arr.reduce()
    for i in range(arr.n):
        for j in range(arr.n):
            assert arr.val.grad[i, j] == 42
Ejemplo n.º 27
0
def test_oop_two_items():
    @ti.data_oriented
    class Array2D:
        def __init__(self, n, m, increment, multiplier):
            self.n = n
            self.m = m
            self.val = ti.field(ti.f32)
            self.total = ti.field(ti.f32)
            self.increment = increment
            self.multiplier = multiplier
            ti.root.dense(ti.ij, (self.n, self.m)).place(self.val)
            ti.root.place(self.total)

        @ti.kernel
        def inc(self):
            for i, j in self.val:
                self.val[i, j] += self.increment

        @ti.kernel
        def reduce(self):
            for i, j in self.val:
                self.total[None] += self.val[i, j] * self.multiplier

    arr1_inc, arr1_mult = 3, 4
    arr2_inc, arr2_mult = 6, 8
    arr1 = Array2D(128, 128, arr1_inc, arr1_mult)
    arr2 = Array2D(16, 32, arr2_inc, arr2_mult)

    ti.root.lazy_grad()

    arr1.inc()
    arr1.inc.grad()
    arr2.inc()
    arr2.inc.grad()
    assert arr1.val[3, 4] == arr1_inc
    assert arr2.val[8, 6] == arr2_inc

    with ti.Tape(loss=arr1.total):
        arr1.reduce()
    with ti.Tape(loss=arr2.total, clear_gradients=False):
        arr2.reduce()
    for i in range(arr1.n):
        for j in range(arr1.m):
            assert arr1.val.grad[i, j] == arr1_mult
    for i in range(arr2.n):
        for j in range(arr2.m):
            assert arr2.val.grad[i, j] == arr2_mult
Ejemplo n.º 28
0
def gradient(alpha, num_steps):
    damping[None] = math.exp(-dt * alpha)
    a[None] = 1
    with ti.Tape(loss):
        for i in range(1, num_steps):
            advance(i)
        compute_loss(num_steps - 1)
    return loss[None]
Ejemplo n.º 29
0
def optimize(toi=True, visualize=True):
    global use_toi
    use_toi = toi

    for i in range(n_hidden):
        for j in range(n_input_states()):
            weights1[i, j] = np.random.randn() * math.sqrt(
                2 / (n_hidden + n_input_states())) * 0.5

    for i in range(n_springs):
        for j in range(n_hidden):
            # TODO: n_springs should be n_actuators
            weights2[i, j] = np.random.randn() * math.sqrt(
                2 / (n_hidden + n_springs)) * 1
    '''
  if visualize:
    clear_states()
    forward('initial{}'.format(robot_id))
  '''

    losses = []
    for iter in range(20):
        clear_states()

        with ti.Tape(loss):
            forward(visualize=visualize)

        print('Iter=', iter, 'Loss=', loss[None])

        total_norm_sqr = 0
        for i in range(n_hidden):
            for j in range(n_input_states()):
                total_norm_sqr += weights1.grad[i, j]**2
            total_norm_sqr += bias1.grad[i]**2

        for i in range(n_springs):
            for j in range(n_hidden):
                total_norm_sqr += weights2.grad[i, j]**2
            total_norm_sqr += bias2.grad[i]**2

        print(total_norm_sqr)

        gradient_clip = 0.2
        scale = learning_rate * min(
            1.0, gradient_clip / (total_norm_sqr**0.5 + 1e-4))
        for i in range(n_hidden):
            for j in range(n_input_states()):
                weights1[i, j] -= scale * weights1.grad[i, j]
            bias1[i] -= scale * bias1.grad[i]

        for i in range(n_springs):
            for j in range(n_hidden):
                weights2[i, j] -= scale * weights2.grad[i, j]
            bias2[i] -= scale * bias2.grad[i]

        losses.append(loss[None])
    return losses
def optimize(visualize):
    for i in range(n_hidden):
        for j in range(n_input_states()):
            weights1[i, j] = np.random.randn() * math.sqrt(
                2 / (n_hidden + n_input_states())) * 2

    for i in range(n_springs):
        for j in range(n_hidden):
            # TODO: n_springs should be n_actuators
            weights2[i, j] = np.random.randn() * math.sqrt(
                2 / (n_hidden + n_springs)) * 3

    losses = []
    # forward('initial{}'.format(robot_id), visualize=visualize)
    for iter in range(200):
        clear()

        import time
        t = time.time()
        with ti.Tape(loss):
            forward(visualize=iter % 10 == 0)
        print(time.time() - t, ' 1')

        print('Iter=', iter, 'Loss=', loss[None])

        total_norm_sqr = 0
        for i in range(n_hidden):
            for j in range(n_input_states()):
                total_norm_sqr += weights1.grad[i, j]**2
            total_norm_sqr += bias1.grad[i]**2

        for i in range(n_springs):
            for j in range(n_hidden):
                total_norm_sqr += weights2.grad[i, j]**2
            total_norm_sqr += bias2.grad[i]**2

        print(total_norm_sqr)

        # scale = learning_rate * min(1.0, gradient_clip / total_norm_sqr ** 0.5)
        gradient_clip = 0.1
        scale = gradient_clip / (total_norm_sqr**0.5 + 1e-6)
        for i in range(n_hidden):
            for j in range(n_input_states()):
                weights1[i, j] -= scale * weights1.grad[i, j]
            bias1[i] -= scale * bias1.grad[i]

        for i in range(n_springs):
            for j in range(n_hidden):
                weights2[i, j] -= scale * weights2.grad[i, j]
            bias2[i] -= scale * bias2.grad[i]
        losses.append(loss[None])

        print(time.time() - t, ' 2')

    losses = gaussian_filter(losses, 10)
    return losses