Python kernel_profiler_print 예제들, taichi.kernel_profiler_print Python 예제들

예제 #1

0

파일 보기

파일: mpm_solver.py 프로젝트: jackylovechina/taichi_elements

    def step(self, frame_dt, print_stat=False):
        begin_t = time.time()
        begin_substep = self.total_substeps

        substeps = int(frame_dt / self.default_dt) + 1

        for i in range(substeps):
            self.total_substeps += 1
            dt = frame_dt / substeps

            self.grid.deactivate_all()
            self.build_pid()
            self.p2g(dt)
            self.grid_normalization_and_gravity(dt)
            for p in self.grid_postprocess:
                p(dt)
            self.g2p(dt)

        if print_stat:
            ti.kernel_profiler_print()
            try:
                ti.memory_profiler_print()
            except:
                pass
            print(f'num particles={self.n_particles[None]}')
            print(f'  frame time {time.time() - begin_t:.3f} s')
            print(
                f'  substep time {1000 * (time.time() - begin_t) / (self.total_substeps - begin_substep):.3f} ms'
            )

예제 #2

0

파일 보기

파일: mpm_lagrangian_forces.py 프로젝트: zh010zh/taichi

def main():
    initialize()

    vertices_ = vertices.to_numpy()

    while gui.running and not gui.get_event(gui.ESCAPE):
        for s in range(int(1e-2 // dt)):
            grid_m.fill(0)
            grid_v.fill(0)
            # Note that we are now differentiating the total energy w.r.t. the particle position.
            # Recall that F = - \partial (total_energy) / \partial x
            with ti.Tape(total_energy):
                # Do the forward computation of total energy and backward propagation for x.grad, which is later used in p2g
                compute_total_energy()
                # It's OK not to use the computed total_energy at all, since we only need x.grad
            p2g()
            grid_op()
            g2p()

        gui.circle((0.5, 0.5), radius=45, color=0x068587)
        particle_pos = x.to_numpy()
        a = vertices_.reshape(n_elements * 3)
        b = np.roll(vertices_, shift=1, axis=1).reshape(n_elements * 3)
        gui.lines(particle_pos[a], particle_pos[b], radius=1, color=0x4FB99F)
        gui.circles(particle_pos, radius=1.5, color=0xF2B134)
        gui.line((0.00, 0.03 / quality), (1.0, 0.03 / quality),
                 color=0xFFFFFF,
                 radius=3)
        gui.show()
        ti.kernel_profiler_print()

예제 #3

0

파일 보기

    def run(self):
        gui = ti.GUI("Multigrid Preconditioned Conjugate Gradients",
                     res=(self.N_gui, self.N_gui))

        self.init()
        self.solve(max_iters=400)
        self.paint()
        ti.imshow(self.pixels)
        ti.kernel_profiler_print()

예제 #4

0

파일 보기

파일: mpm_solver.py 프로젝트: TREYWANGCQU/taichi_elements

    def step(self, frame_dt, print_stat=False):
        begin_t = time.time()
        begin_substep = self.total_substeps

        substeps = int(frame_dt / self.default_dt) + 1

        if print_stat:
            print(f'needed substeps: {substeps}')
        for i in range(substeps):
            print('.', end='', flush=True)
            self.total_substeps += 1
            dt = frame_dt / substeps

            if self.use_g2p2g:
                output_grid = 1 - self.input_grid
                self.grid[output_grid].deactivate_all()
                self.build_pid(self.pid[self.input_grid], self.grid_m[self.input_grid], 0.5)
                self.g2p2g(dt, self.pid[self.input_grid],
                           self.grid_v[self.input_grid],
                           self.grid_v[output_grid], self.grid_m[output_grid])
                self.grid_normalization_and_gravity(dt,
                                                    self.grid_v[output_grid],
                                                    self.grid_m[output_grid])
                for p in self.grid_postprocess:
                    p(self.t, dt, self.grid_v[output_grid])
                self.input_grid = output_grid
                self.t += dt
            else:
                self.grid.deactivate_all()
                self.build_pid(self.pid, self.grid_m, 0.5)
                self.p2g(dt)
                self.grid_normalization_and_gravity(dt, self.grid_v,
                                                    self.grid_m)
                for p in self.grid_postprocess:
                    p(self.t, dt, self.grid_v)
                self.t += dt
                self.g2p(dt)
        self.all_time_max_velocity = max(self.all_time_max_velocity,
                                         self.compute_max_velocity())
        print()

        if print_stat:
            ti.kernel_profiler_print()
            try:
                ti.memory_profiler_print()
            except:
                pass
            print(f'CFL: {self.all_time_max_velocity * dt / self.dx}')
            print(f'num particles={self.n_particles[None]}')
            print(f'  frame time {time.time() - begin_t:.3f} s')
            print(
                f'  substep time {1000 * (time.time() - begin_t) / (self.total_substeps - begin_substep):.3f} ms'
            )

예제 #5

0

파일 보기

def print_async_stats(include_kernel_profiler=False):
    import taichi as ti
    if include_kernel_profiler:
        ti.kernel_profiler_print()
        print()
    stat = ti.get_kernel_stats()
    counters = stat.get_counters()
    print('=======================')
    print('Async benchmark metrics')
    print('-----------------------')
    print(f'Async mode:           {ti.current_cfg().async_mode}')
    print(f'Kernel time:          {ti.kernel_profiler_total_time():.3f} s')
    print(f'Tasks launched:       {int(counters["launched_tasks"])}')
    print(f'Instructions emitted: {int(counters["codegen_statements"])}')
    print(f'Tasks compiled:       {int(counters["codegen_offloaded_tasks"])}')
    print('=======================')

예제 #6

0

파일 보기

파일: diffmpm_benchmark.py 프로젝트: zeta1999/difftaichi

def benchmark():
    print(
        'Also check "nvprof --print-gpu-trace python3 diffmpm_benchmark.py" for more accurate results'
    )
    iters = 100000
    for i in range(1):
        p2g(0)
        grid_op()
        g2p(0)
    ti.sync()
    ti.kernel_profiler_clear()
    t = time.time()
    for i in range(iters):
        # clear_grid()
        p2g(0)
        grid_op()
        g2p(0)
    ti.sync()
    print('forward ', (time.time() - t) / iters * 1000 * 3, 'ms')
    ti.kernel_profiler_print()

    for i in range(1):
        p2g.grad(0)
        grid_op.grad()
        g2p.grad(0)
    ti.sync()
    ti.kernel_profiler_clear()
    t = time.time()
    for i in range(iters):
        # clear_grid()
        g2p.grad(0)
        grid_op.grad()
        p2g.grad(0)
    ti.sync()
    print('backward ', (time.time() - t) / iters * 1000 * 3, 'ms')
    ti.kernel_profiler_print()

예제 #7

0

파일 보기

파일: main.py 프로젝트: JYLeeLYJ/Fluid-Engine-Dev-on-Taichi

from smoke_animation import Smoke_Animation
from colliders import RigidBodyCollier, Collider
from geometry import Box, Ball

res = (512, 512)

ti.init(arch=ti.gpu, kernel_profiler=True)
gui = ti.GUI("smoke animation", res=res)

# build smoke solver
smoke = \
    Smoke_Builder(res)  \
    .add_flow_emitter([512//2 , 0] , 512//3 , 2000.0)    \
    .set_decay(0.995)   \
    .build()

# .set_compute_buoyancy_force(tempreture_factor=600)\

ani = Smoke_Animation(smoke, res)
ani.reset()

# collider
smoke.add_collider(RigidBodyCollier(Box([156, 156], [226, 276])))
smoke.add_collider(RigidBodyCollier(Ball([276, 226], 30)))

while gui.running:
    ani.update()
    ani.display(gui)

ti.kernel_profiler_print()

예제 #8

0

파일 보기

파일: bls_test_template.py 프로젝트: isdanni/taichi

def bls_test_template(dim,
                      N,
                      bs,
                      stencil,
                      block_dim=None,
                      scatter=False,
                      benchmark=0,
                      dense=False):
    x, y, y2 = ti.field(ti.i32), ti.field(ti.i32), ti.field(ti.i32)

    index = ti.indices(*range(dim))
    mismatch = ti.field(ti.i32, shape=())

    if not isinstance(bs, (tuple, list)):
        bs = [bs for _ in range(dim)]

    grid_size = [N // bs[i] for i in range(dim)]

    if dense:
        create_block = lambda: ti.root.dense(index, grid_size)
    else:
        create_block = lambda: ti.root.pointer(index, grid_size)

    if scatter:
        block = create_block()

        block.dense(index, bs).place(x)
        block.dense(index, bs).place(y)
        block.dense(index, bs).place(y2)
    else:
        create_block().dense(index, bs).place(x)
        create_block().dense(index, bs).place(y)
        create_block().dense(index, bs).place(y2)

    ndrange = ((bs[i], N - bs[i]) for i in range(dim))

    if block_dim is None:
        block_dim = 1
        for i in range(dim):
            block_dim *= bs[i]

    @ti.kernel
    def populate():
        for I in ti.grouped(ti.ndrange(*ndrange)):
            s = 0
            for i in ti.static(range(dim)):
                s += I[i]**(i + 1)
            x[I] = s

    @ti.kernel
    def apply(use_bls: ti.template(), y: ti.template()):
        if ti.static(use_bls and not scatter):
            ti.cache_shared(x)
        if ti.static(use_bls and scatter):
            ti.cache_shared(y)

        ti.block_dim(block_dim)
        for I in ti.grouped(x):
            if ti.static(scatter):
                for offset in ti.static(stencil):
                    y[I + ti.Vector(offset)] += x[I]
            else:
                # gather
                s = 0
                for offset in ti.static(stencil):
                    s = s + x[I + ti.Vector(offset)]
                y[I] = s

    populate()

    if benchmark:
        for i in range(benchmark):
            x.snode.parent().deactivate_all()
            if not scatter:
                populate()
            y.snode.parent().deactivate_all()
            y2.snode.parent().deactivate_all()
            apply(False, y2)
            apply(True, y)
    else:
        # Simply test
        apply(False, y2)
        apply(True, y)

    @ti.kernel
    def check():
        for I in ti.grouped(y2):
            if y[I] != y2[I]:
                print('check failed', I, y[I], y2[I])
                mismatch[None] = 1

    check()

    ti.kernel_profiler_print()

    assert mismatch[None] == 0

예제 #9

0

파일 보기

파일: diffmpm_benchmark.py 프로젝트: zeta1999/difftaichi

def main():
    # initialization
    init_v[None] = [0, 0]

    for i in range(n_particles):
        F[0, i] = [[1, 0], [0, 1]]

    for i in range(N):
        for j in range(N):
            x[0, i * N + j] = [dx * (i * 0.5 + 10), dx * (j * 0.5 + 25)]

    set_v()
    benchmark()

    losses = []
    img_count = 0
    for i in range(30):
        with ti.Tape(loss=loss):
            set_v()
            for s in range(steps - 1):
                substep(s)

            loss[None] = 0
            x_avg[None] = [0, 0]
            compute_x_avg()
            compute_loss()
        l = loss[None]
        losses.append(l)
        grad = init_v.grad[None]
        print('loss=', l, '   grad=', (grad[0], grad[1]))
        learning_rate = 10
        init_v(0)[None] -= learning_rate * grad[0]
        init_v(1)[None] -= learning_rate * grad[1]

        # visualize
        for s in range(63, steps, 64):
            scale = 4
            img = np.zeros(shape=(scale * n_grid, scale * n_grid)) + 0.3
            total = [0, 0]
            for i in range(n_particles):
                p_x = int(scale * x(0)[s, i] / dx)
                p_y = int(scale * x(1)[s, i] / dx)
                total[0] += p_x
                total[1] += p_y
                img[p_x, p_y] = 1
            cv2.circle(img, (total[1] // n_particles, total[0] // n_particles),
                       radius=5,
                       color=0,
                       thickness=5)
            cv2.circle(img, (int(
                target[1] * scale * n_grid), int(target[0] * scale * n_grid)),
                       radius=5,
                       color=1,
                       thickness=5)
            img = img.swapaxes(0, 1)[::-1]
            cv2.imshow('MPM', img)
            img_count += 1
            # cv2.imwrite('MPM{:04d}.png'.format(img_count), img * 255)
            cv2.waitKey(1)
        ti.kernel_profiler_print()

    ti.kernel_profiler_print()
    plt.title("Optimization of Initial Velocity")
    plt.ylabel("Loss")
    plt.xlabel("Gradient Descent Iterations")
    plt.plot(losses)
    plt.show()

예제 #10

0

파일 보기

 def run(self, verbose=False):
     self.init()
     self.solve(max_iters=400, verbose=verbose)
     self.paint()
     ti.imshow(self.pixels)
     ti.kernel_profiler_print()

예제 #11

0

파일 보기

    def run(self):
        gui = ti.GUI("Multigrid Preconditioned Conjugate Gradients",
                     res=(self.N_gui, self.N_gui))

        self.init()

        self.reduce(self.r[0], self.r[0])
        initial_rTr = self.sum[None]

        # self.r = b - Ax = b    since self.x = 0
        # self.p = self.r = self.r + 0 self.p
        if self.use_multigrid:
            self.apply_preconditioner()
        else:
            self.z[0].copy_from(self.r[0])

        self.update_p()

        self.reduce(self.z[0], self.r[0])
        old_zTr = self.sum[None]

        # CG
        for i in range(400):
            # self.alpha = rTr / pTAp
            self.compute_Ap()
            self.reduce(self.p, self.Ap)
            pAp = self.sum[None]
            self.alpha[None] = old_zTr / pAp

            # self.x = self.x + self.alpha self.p
            self.update_x()

            # self.r = self.r - self.alpha self.Ap
            self.update_r()

            # check for convergence
            self.reduce(self.r[0], self.r[0])
            rTr = self.sum[None]
            if rTr < initial_rTr * 1.0e-12:
                break

            # self.z = M^-1 self.r
            if self.use_multigrid:
                self.apply_preconditioner()
            else:
                self.z[0].copy_from(self.r[0])

            # self.beta = new_rTr / old_rTr
            self.reduce(self.z[0], self.r[0])
            new_zTr = self.sum[None]
            self.beta[None] = new_zTr / old_zTr

            # self.p = self.z + self.beta self.p
            self.update_p()
            old_zTr = new_zTr

            print(f'iter {i}, residual={rTr}')
            self.paint()
            gui.set_image(self.pixels)
            gui.show()

        ti.kernel_profiler_print()