def decorated(*args, **kwargs): impl.get_runtime().inside_complex_kernel = True if impl.get_runtime().target_tape: impl.get_runtime().target_tape.insert(decorated, args) try: func(*args, **kwargs) finally: impl.get_runtime().inside_complex_kernel = False
def func__(*args, **kwargs): finishes = False try: func(*args, **kwargs) finishes = True except ex: # throws. test passed pass except Exception as err_actual: assert False, 'Exception {} instead of {} thrown'.format( str(type(err_actual)), str(ex)) if finishes: assert False, 'Test successfully finished instead of throwing {}'.format( str(ex))
def run_benchmark(): compile_time = time.time() func(*args) # compile the kernel first ti.sync() compile_time = time.time() - compile_time ti.stat_write('compilation_time', compile_time) codegen_stat = _ti_core.stat() for line in codegen_stat.split('\n'): try: a, b = line.strip().split(':') except: continue a = a.strip() b = int(float(b)) if a == 'codegen_kernel_statements': ti.stat_write('compiled_inst', b) if a == 'codegen_offloaded_tasks': ti.stat_write('compiled_tasks', b) elif a == 'launched_tasks': ti.stat_write('launched_tasks', b) # Use 3 initial iterations to warm up # instruction/data caches. Discussion: # https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136 for i in range(3): func(*args) ti.sync() ti.kernel_profiler_clear() t = time.time() for n in range(repeat): func(*args) ti.sync() elapsed = time.time() - t avg = elapsed / repeat ti.stat_write('wall_clk_t', avg) device_time = ti.kernel_profiler_total_time() avg_device_time = device_time / repeat ti.stat_write('exec_t', avg_device_time)
def decorated(*args, **kwargs): func(*args, **kwargs)
def test(*args, **kwargs): archs = [_ti_core.host_arch()] for arch in archs: ti.init(arch=arch) func(*args, **kwargs)