Exemple #1
0
def test_remove_clear_list_from_fused_serial():
    x = ti.field(ti.i32)
    y = ti.field(ti.i32)
    z = ti.field(ti.i32, shape=())

    n = 32
    ti.root.pointer(ti.i, n).dense(ti.i, 1).place(x)
    ti.root.pointer(ti.i, n).dense(ti.i, 1).place(y)

    @ti.kernel
    def init_xy():
        for i in range(n):
            if i & 1:
                x[i] = i
            else:
                y[i] = i

    init_xy()
    ti.sync()

    stats = ti.get_kernel_stats()
    stats.clear()

    @ti.kernel
    def inc(f: ti.template()):
        for i in f:
            f[i] += 1

    @ti.kernel
    def serial_z():
        z[None] = 40
        z[None] += 2

    inc(x)
    inc(y)
    serial_z()
    inc(x)
    inc(y)
    ti.sync()

    counters = stats.get_counters()
    # each of x and y has two listgens: root -> pointer -> dense
    assert int(counters['launched_tasks_list_gen']) == 4
    # clear list tasks have been fused into serial_z
    assert int(counters['launched_tasks_serial']) == 1

    xs = x.to_numpy()
    ys = y.to_numpy()
    for i in range(n):
        if i & 1:
            assert xs[i] == i + 2
            assert ys[i] == 0
        else:
            assert ys[i] == i + 2
            assert xs[i] == 0
Exemple #2
0
def test_listgen_opt_with_offsets():
    x = ti.field(dtype=ti.i32)

    ti.root.pointer(ti.i, 4).dense(ti.i, 4).place(x, offset=-8)

    @ti.kernel
    def inc():
        for i in x:
            x[i] += 1

    for i in range(10):
        inc()

    ti.sync()
    assert ti.get_kernel_stats().get_counters()['launched_tasks_list_gen'] <= 2
Exemple #3
0
def print_async_stats(include_kernel_profiler=False):
    import taichi as ti
    if include_kernel_profiler:
        ti.kernel_profiler_print()
        print()
    stat = ti.get_kernel_stats()
    counters = stat.get_counters()
    print('=======================')
    print('Async benchmark metrics')
    print('-----------------------')
    print(f'Async mode:           {ti.current_cfg().async_mode}')
    print(f'Kernel time:          {ti.kernel_profiler_total_time():.3f} s')
    print(f'Tasks launched:       {int(counters["launched_tasks"])}')
    print(f'Instructions emitted: {int(counters["codegen_statements"])}')
    print(f'Tasks compiled:       {int(counters["codegen_offloaded_tasks"])}')
    print('=======================')
Exemple #4
0
def print_async_stats(include_kernel_profiler=False):
    if include_kernel_profiler:
        ti.print_kernel_profile_info()
        print()
    stat = ti.get_kernel_stats()
    counters = stat.get_counters()
    print('=======================')
    print('Async benchmark metrics')
    print('-----------------------')
    print(f'Async mode:           {ti.current_cfg().async_mode}')
    print(f'Kernel time:          {ti.kernel_profiler_total_time():.3f} s')
    print(f'Tasks launched:       {int(counters["launched_tasks"])}')
    print(f'Instructions emitted: {int(counters["codegen_statements"])}')
    print(f'Tasks compiled:       {int(counters["codegen_offloaded_tasks"])}')
    NUM_FUSED_TASKS_KEY = 'num_fused_tasks'
    if NUM_FUSED_TASKS_KEY in counters:
        print(f'Tasks fused:          {int(counters["num_fused_tasks"])}')
    print('=======================')
Exemple #5
0
def test_sfg_dead_store_elimination():
    ti.init(arch=ti.cpu, async_mode=True)
    n = 32

    x = ti.field(dtype=float, shape=n, needs_grad=True)
    total_energy = ti.field(dtype=float, shape=(), needs_grad=True)
    unused = ti.field(dtype=float, shape=())

    @ti.kernel
    def gather():
        for i in x:
            e = x[i]**2
            total_energy[None] += e

    @ti.kernel
    def scatter():
        for i in x:
            unused[None] += x[i]

    xnp = np.arange(n, dtype=np.float32)
    x.from_numpy(xnp)
    ti.sync()

    stats = ti.get_kernel_stats()
    stats.clear()

    for _ in range(5):
        with ti.Tape(total_energy):
            gather()
        scatter()

    ti.sync()
    counters = stats.get_counters()

    # gather() should be DSE'ed
    assert counters['sfg_dse_tasks'] > 0

    x_grad = x.grad.to_numpy()
    for i in range(n):
        assert ti.approx(x_grad[i]) == 2.0 * i