Esempio n. 1
0
 def run_benchmark():
     compile_time = time.time()
     func(*args)
     compile_time = time.time() - compile_time
     ti.stat_write('compilation_time', compile_time)
     codegen_stat = ti.core.stat()
     for line in codegen_stat.split('\n'):
         try:
             a, b = line.strip().split(':')
         except:
             continue
         a = a.strip()
         b = int(float(b))
         if a == 'codegen_kernel_statements':
             ti.stat_write('instructions', b)
         if a == 'codegen_offloaded_tasks':
             ti.stat_write('offloaded_tasks', b)
         elif a == 'launched_kernels':
             ti.stat_write('launched_kernels', b)
     # The reason why we run 4 times is to warm up instruction/data caches.
     # Discussion: https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136
     for i in range(4):
         func(*args)  # compile the kernel first
     ti.sync()
     t = time.time()
     for n in range(repeat):
         func(*args)
     ti.get_runtime().sync()
     elapsed = time.time() - t
     avg = elapsed / repeat
     ti.stat_write('running_time', avg)
Esempio n. 2
0
def test_2D_bit_array():
    ci1 = ti.type_factory_.get_custom_int_type(1, False)

    x = ti.field(dtype=ci1)

    M, N = 4, 8

    ti.root._bit_array(ti.ij, (M, N), num_bits=32).place(x)

    ti.get_runtime().materialize()

    @ti.kernel
    def set_val():
        for i in range(M):
            for j in range(N):
                x[i, j] = (i * N + j) % 2

    @ti.kernel
    def verify_val():
        for i in range(M):
            for j in range(N):
                assert x[i, j] == (i * N + j) % 2

    set_val()
    verify_val()
Esempio n. 3
0
def test_simple_array():
    ci13 = ti.quant.int(13, True)
    cu19 = ti.quant.int(19, False)

    x = ti.field(dtype=ci13)
    y = ti.field(dtype=cu19)

    N = 12

    ti.root.dense(ti.i, N).bit_struct(num_bits=32).place(x, y)

    ti.get_runtime().materialize()

    @ti.kernel
    def set_val():
        for i in range(N):
            x[i] = -2**i
            y[i] = 2**i - 1

    @ti.kernel
    def verify_val():
        for i in range(N):
            assert x[i] == -2**i
            assert y[i] == 2**i - 1

    set_val()
    verify_val()

    # Test bit_struct SNode read and write in Python-scope by calling the wrapped, untranslated function body
    set_val.__wrapped__()
    verify_val.__wrapped__()
Esempio n. 4
0
def test_simple_array():
    ti.init(arch=ti.cpu, debug=True, print_ir=True, cfg_optimization=False)
    ci13 = ti.type_factory_.get_custom_int_type(13, True)
    cu19 = ti.type_factory_.get_custom_int_type(19, False)

    x = ti.field(dtype=ci13)
    y = ti.field(dtype=cu19)

    N = 12

    ti.root.dense(ti.i, N)._bit_struct(num_bits=32).place(x, y)

    ti.get_runtime().materialize()

    @ti.kernel
    def set_val():
        for i in range(N):
            x[i] = -2**i
            y[i] = 2**i - 1

    @ti.kernel
    def verify_val():
        for i in range(N):
            assert x[i] == -2**i
            assert y[i] == 2**i - 1

    set_val()
    verify_val()
def test_unordered():
    val = ti.field(ti.i32)

    n = 3
    m = 7
    p = 11

    blk1 = ti.root.dense(ti.k, n)
    blk2 = blk1.dense(ti.i, m)
    blk3 = blk2.dense(ti.j, p)
    blk3.place(val)

    assert val.dtype == ti.i32
    assert val.shape == (m, p, n)
    assert val.snode.parent(0) == val.snode
    assert val.snode.parent() == blk3
    assert val.snode.parent(1) == blk3
    assert val.snode.parent(2) == blk2
    assert val.snode.parent(3) == blk1
    assert val.snode.parent(4) == ti.root

    assert val.snode in blk3.get_children()
    assert blk3 in blk2.get_children()
    assert blk2 in blk1.get_children()
    ti.get_runtime().materialize()
    assert blk1 in ti.FieldsBuilder.finalized_roots()[0].get_children()

    expected_str = f'ti.root => dense {[n]} => dense {[m, n]}' \
        f' => dense {[m, p, n]} => place {[m, p, n]}'
    assert str(val.snode) == expected_str
Esempio n. 6
0
def test_clear_all_gradients():
    x = ti.var(ti.f32)
    y = ti.var(ti.f32)
    z = ti.var(ti.f32)
    w = ti.var(ti.f32)

    n = 128

    ti.root.place(x)
    ti.root.dense(ti.i, n).place(y)
    ti.root.dense(ti.i, n).dense(ti.j, n).place(z, w)
    ti.root.lazy_grad()

    x.grad[None] = 3
    for i in range(n):
        y.grad[i] = 3
        for j in range(n):
            z.grad[i, j] = 5
            w.grad[i, j] = 6

    ti.clear_all_gradients()
    assert ti.get_runtime().get_num_compiled_functions() == 3

    assert x.grad[None] == 0
    for i in range(n):
        assert y.grad[i] == 0
        for j in range(n):
            assert z.grad[i, j] == 0
            assert w.grad[i, j] == 0

    ti.clear_all_gradients()
    # No more kernel compilation
    assert ti.get_runtime().get_num_compiled_functions() == 3
Esempio n. 7
0
def test_fused_kernels():
    n = 12
    X = ti.Matrix(3, 2, ti.f32, shape=(n, n, n))
    s = ti.get_runtime().get_num_compiled_functions()
    t = X.to_torch()
    assert ti.get_runtime().get_num_compiled_functions() == s + 1
    X.from_torch(t)
    assert ti.get_runtime().get_num_compiled_functions() == s + 2
Esempio n. 8
0
def test_matrix_field_dynamic_index_different_path_length():
    v = ti.Vector.field(2, ti.i32)
    x = v.get_scalar_field(0)
    y = v.get_scalar_field(1)

    ti.root.dense(ti.i, 8).place(x)
    ti.root.dense(ti.i, 2).dense(ti.i, 4).place(y)

    ti.get_runtime().materialize()
    assert v.dynamic_index_stride is None
Esempio n. 9
0
def test_matrix_field_dynamic_index_not_pure_dense():
    v = ti.Vector.field(2, ti.i32)
    x = v.get_scalar_field(0)
    y = v.get_scalar_field(1)

    ti.root.dense(ti.i, 2).pointer(ti.i, 4).place(x)
    ti.root.dense(ti.i, 2).dense(ti.i, 4).place(y)

    ti.get_runtime().materialize()
    assert v.dynamic_index_stride is None
Esempio n. 10
0
def test_div_default_ip():
    ti.get_runtime().set_default_ip(ti.i64)
    z = ti.field(ti.f32, shape=())

    @ti.kernel
    def func():
        a = 1e15 + 1e9
        z[None] = a // 1e10

    func()
    assert z[None] == 100000
Esempio n. 11
0
def benchmark(func, repeat=100, args=()):
    import taichi as ti
    import time
    func(*args)  # compile the kernel first
    ti.sync()
    t = time.time()
    for n in range(repeat):
        func(*args)
    elapsed = time.time() - t
    ti.get_runtime().sync()
    return elapsed / repeat
Esempio n. 12
0
def test_matrix_field_dynamic_index_different_stride():
    temp = ti.field(ti.f32)

    v = ti.Vector.field(3, ti.i32)
    x = v.get_scalar_field(0)
    y = v.get_scalar_field(1)
    z = v.get_scalar_field(2)

    ti.root.dense(ti.i, 8).place(x, y, temp, z)

    ti.get_runtime().materialize()
    assert v.dynamic_index_stride is None
Esempio n. 13
0
def test_matrix_field_dynamic_index_different_offset_bytes_in_parent_cell():
    temp_a = ti.field(ti.f32)
    temp_b = ti.field(ti.f32)

    v = ti.Vector.field(2, ti.i32)
    x = v.get_scalar_field(0)
    y = v.get_scalar_field(1)

    ti.root.dense(ti.i, 8).place(temp_a, x)
    ti.root.dense(ti.i, 8).place(y, temp_b)

    ti.get_runtime().materialize()
    assert v.dynamic_index_stride is None
Esempio n. 14
0
 def cast(self, dt):
     ret = self.copy()
     if type(dt) is type and issubclass(dt, numbers.Number):
         import taichi as ti
         if dt is float:
             dt = ti.get_runtime().default_fp
         elif dt is int:
             dt = ti.get_runtime().default_ip
         else:
             assert False
     for i in range(len(self.entries)):
         ret.entries[i] = impl.cast(ret.entries[i], dt)
     return ret
Esempio n. 15
0
        def test(*func_args, **func_kwargs):
            import taichi as ti
            can_run_on = func_kwargs.pop(_tests_arch_checkers_argname,
                                         _ArchCheckers())
            # Filter away archs that don't support 64-bit data.
            fp = func_kwargs.get('default_fp', ti.get_runtime().default_fp)
            ip = func_kwargs.get('default_ip', ti.get_runtime().default_ip)
            if fp == ti.f64 or ip == ti.i64:
                can_run_on.register(
                    lambda arch: is_supported(arch, extension.data64))

            for arch in ti.supported_archs():
                if can_run_on(arch):
                    ti.init(arch=arch, **kwargs)
                    func(*func_args, **func_kwargs)
Esempio n. 16
0
def test_indices():
    a = ti.var(ti.f32, shape=(128, 32, 8))

    b = ti.var(ti.f32)
    ti.root.dense(ti.j, 32).dense(ti.i, 16).place(b)

    ti.get_runtime().materialize()

    mapping_a = a.snode().physical_index_position()

    assert mapping_a == {0: 0, 1: 1, 2: 2}

    mapping_b = b.snode().physical_index_position()

    assert mapping_b == {0: 1, 1: 0}
Esempio n. 17
0
def benchmark(func, repeat=300, args=()):
    import taichi as ti
    import time
    # The reason why we run 4 times is to warm up instruction/data caches.
    # Discussion: https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136
    for i in range(4):
        func(*args)  # compile the kernel first
    ti.sync()
    t = time.time()
    for n in range(repeat):
        func(*args)
    ti.get_runtime().sync()
    elapsed = time.time() - t
    avg = elapsed / repeat * 1000  # miliseconds
    ti.stat_write(avg)
Esempio n. 18
0
 def custom_int(self, bits, signed=True, compute_type=None):
     import taichi as ti
     if compute_type is None:
         compute_type = ti.get_runtime().default_ip
     if isinstance(compute_type, ti.core.DataType):
         compute_type = compute_type.get_ptr()
     return self.core.get_custom_int_type(bits, signed, compute_type)
Esempio n. 19
0
def main():
    print("Loading initial and target states...")
    initial_smoke_img = cv2.imread("init_smoke.png")[:, :, 0] / 255.0
    target_img = cv2.resize(cv2.imread('taichi.png'),
                            (n_grid, n_grid))[:, :, 0] / 255.0

    for i in range(n_grid):
        for j in range(n_grid):
            target[i, j] = target_img[i, j]
            smoke[0, i, j] = initial_smoke_img[i, j]

    for opt in range(num_iterations):
        t = time.time()
        with ti.Tape(loss):
            output = "test" if opt % 10 == -1 else None
            forward(output)
        print('total time', (time.time() - t) * 1000, 'ms')

        print('Iter', opt, ' Loss =', loss[None])
        apply_grad()
        print("Compilation time:",
              ti.get_runtime().prog.get_total_compilation_time())
        # ti.profiler_print()

    forward("output")
Esempio n. 20
0
def test_default_ip_ndarray(dtype):
    ti.init(arch=supported_archs_taichi_ndarray,
            default_ip=dtype,
            ndarray_use_torch=False)

    x = ti.Vector.ndarray(2, int, ())

    assert x.dtype == ti.get_runtime().default_ip
Esempio n. 21
0
def init(default_fp=None, default_ip=None, print_preprocessed=None, debug=None, **kwargs):
  if debug is None:
    debug = bool(int(os.environ.get('TI_DEBUG', '0')))

  # Make a deepcopy in case these args reference to items from ti.cfg, which are
  # actually references. If no copy is made and the args are indeed references,
  # ti.reset() could override the args to their default values.
  default_fp = _deepcopy(default_fp)
  default_ip = _deepcopy(default_ip)
  kwargs = _deepcopy(kwargs)
  import taichi as ti
  ti.reset()
  if default_fp is not None:
    ti.get_runtime().set_default_fp(default_fp)
  if default_ip is not None:
    ti.get_runtime().set_default_ip(default_ip)
  if print_preprocessed is not None:
    ti.get_runtime().print_preprocessed = print_preprocessed
  if debug:
    ti.set_logging_level(ti.DEBUG)
  ti.cfg.debug = debug

  log_level = os.environ.get('TI_LOG_LEVEL', '')
  if log_level:
    ti.set_logging_level(log_level)
  for k, v in kwargs.items():
    setattr(ti.cfg, k, v)
  ti.get_runtime().create_program()
Esempio n. 22
0
def _test_inconsistent_trailing_bits():
    ti.init(arch=ti.cpu, debug=True, print_ir=True)

    x = ti.field(ti.f32)
    y = ti.field(ti.f32)
    z = ti.field(ti.f32)

    block = ti.root.pointer(ti.i, 8)

    # Here the numbers of bits of x and z are inconsistent,
    # which leads to the RuntimeError below.
    block.dense(ti.i, 32).place(x)
    block.dense(ti.i, 16).place(z)

    block.dense(ti.j, 16).place(y)

    with pytest.raises(RuntimeError):
        ti.get_runtime().materialize()
Esempio n. 23
0
    def decorated(*args, _gradient=False, **kwargs):
        if _gradient:
            adjoint(*args, **kwargs)
        else:
            primal(*args, **kwargs)

        import taichi as ti
        runtime = ti.get_runtime()
        if runtime.target_tape and not runtime.inside_complex_kernel:
            runtime.target_tape.insert(decorated, args)
Esempio n. 24
0
 def fixed(frac, signed=True, range=1.0, compute=None):
     import taichi as ti
     # TODO: handle cases with frac > 32
     frac_type = Quant.int(bits=frac, signed=signed, compute=ti.i32)
     if signed:
         scale = range / 2**(frac - 1)
     else:
         scale = range / 2**frac
     if compute is None:
         compute = ti.get_runtime().default_fp
     return ti.type_factory.custom_float(frac_type, None, compute, scale)
Esempio n. 25
0
 def float(exp, frac, signed=True, compute=None):
     import taichi as ti
     # Exponent is always unsigned
     exp_type = Quant.int(bits=exp, signed=False, compute=ti.i32)
     # TODO: handle cases with frac > 32
     frac_type = Quant.int(bits=frac, signed=signed, compute=ti.i32)
     if compute is None:
         compute = ti.get_runtime().default_fp
     return ti.type_factory.custom_float(significand_type=frac_type,
                                         exponent_type=exp_type,
                                         compute_type=compute)
Esempio n. 26
0
def _test_compiled_functions():
    @ti.kernel
    def func(a: ti.any_arr(element_dim=1)):
        for i in range(5):
            for j in range(4):
                a[i][j * j] = j * j

    v = ti.Vector.ndarray(10, ti.i32, 5)
    func(v)
    assert ti.get_runtime().get_num_compiled_functions() == 1
    v = np.zeros((6, 10), dtype=np.int32)
    func(v)
    assert ti.get_runtime().get_num_compiled_functions() == 1
    import torch
    v = torch.zeros((6, 11), dtype=torch.int32)
    func(v)
    assert ti.get_runtime().get_num_compiled_functions() == 2
    v = ti.Vector.ndarray(10, ti.i32, 5, layout=ti.Layout.SOA)
    func(v)
    assert ti.get_runtime().get_num_compiled_functions() == 3
Esempio n. 27
0
def test_random_int():
    for precision in [ti.i32, ti.i64]:
        ti.init()
        n = 1024
        x = ti.var(ti.f32, shape=(n, n))
        ti.get_runtime().set_default_fp(ti.f64)

        @ti.kernel
        def fill():
            for i in range(n):
                for j in range(n):
                    v = ti.random(precision)
                    if precision == ti.i32:
                        x[i, j] = (float(v) + float(2**31)) / float(2**32)
                    else:
                        x[i, j] = (float(v) + float(2**63)) / float(2**64)

        fill()
        X = x.to_numpy()
        for i in range(4):
            assert (X**i).mean() == approx(1 / (i + 1), rel=1e-2)
Esempio n. 28
0
 def custom_float(self,
                  significand_type,
                  exponent_type=None,
                  compute_type=None,
                  scale=1.0):
     import taichi as ti
     if compute_type is None:
         compute_type = ti.get_runtime().default_fp.get_ptr()
     return self.core.get_custom_float_type(significand_type,
                                            exponent_type,
                                            compute_type,
                                            scale=scale)
Esempio n. 29
0
def benchmark_flat_range():
    a = ti.var(dt=ti.f32)
    N = 512

    @ti.layout
    def place():
        ti.root.dense(ti.ij, N * 8).place(a)

    @ti.kernel
    def fill():
        for j in range(N * 8):
            for i in range(N * 8):
                a[i, j] = 2.0

    ti.get_runtime().sync()
    t = time.time()
    for n in range(100):
        fill()
    ti.get_runtime().sync()
    elapsed = time.time() - t
    return elapsed / 100
Esempio n. 30
0
def test_assert():
    return
    ti.get_runtime().print_preprocessed = True
    ti.cfg.print_ir = True
    # ti.cfg.arch = ti.cuda

    @ti.kernel
    def func():
        x = 20
        assert 10 <= x < 20

    func()