def model(): with jt.var_scope('a'): assert jt.current_scope.full_name == "model/a_0/" with jt.var_scope('b'): with jt.var_scope('b'): assert jt.current_scope.full_name == "model/b_0/b_0/" with jt.var_scope('c'): assert jt.current_scope.full_name == "model/c_0/"
def test_print_trace(self): jt.print_trace() # force use addr2line jt.flags.gdb_path = "" with jt.var_scope(gdb_path=""): jt.print_trace()
def test_log_capture(self): LOG.log_capture_start() with jt.var_scope(log_v=1000, log_vprefix=""): LOG.v("1") LOG.vv("2") LOG.i("3") LOG.w("4") LOG.e("5") a = jt.zeros([10]) a.sync() LOG.log_capture_stop() # TODO: why need manually delete this variable? del a logs = LOG.log_capture_read() logs2 = LOG.log_capture_read() assert len(logs2) == 0 for i in range(5): assert logs[i]['msg'] == str(i + 1) assert logs[i]['level'] == 'iiiwe'[i] assert logs[i]['name'] == 'test_log.py' finished_log = [ l["msg"] for l in logs if l["name"] == "executor.cc" and "return vars:" in l["msg"] ] assert len(finished_log) == 1 and "[10,]" in finished_log[0]
def test_no_cuda_op(self): no_cuda_op = jt.compile_custom_op( """ struct NoCudaOp : Op { Var* output; NoCudaOp(NanoVector shape, string dtype="float"); const char* name() const override { return "my_cuda"; } DECLARE_jit_run; }; """, """ #ifndef JIT NoCudaOp::NoCudaOp(NanoVector shape, string dtype) { flags.set(NodeFlags::_cpu); output = create_output(shape, dtype); } void NoCudaOp::jit_prepare() { add_jit_define("T", output->dtype()); } #else // JIT void NoCudaOp::jit_run() {} #endif // JIT """, "no_cuda") # force use cuda with jt.var_scope(use_cuda=2): a = no_cuda_op([3, 4, 5], 'float') expect_error(lambda: a())
def check(xshape, wshape, stride, pad): a = np.random.rand(*xshape).astype(np.float32) b = np.random.rand(*wshape).astype(np.float32) c = jt.mkl_ops.mkl_conv(a, b, stride, pad, 1, xformat="acdb", wformat="hwio").data a_jt = jt.array(a) b_jt = jt.array(b) with jt.var_scope(enable_tuner=0, compile_options={"test_mkl_conv": uid[0]}): c_jt = conv_nhwc_hwio(a_jt, b_jt, stride, pad).data with jt.log_capture_scope( enable_tuner=1, compile_options={"test_mkl_conv": uid[0] + 1}, log_v=0, log_vprefix="tuner_manager=100,conv_tuner=1000", ) as raw_logs: c_jt_tune = conv_nhwc_hwio(a_jt, b_jt, stride, pad).data uid[0] += 2 assert np.max(c_jt - c) < 1e-4 and np.max(c_jt_tune - c) < 1e-4 logs = find_log_with_re( raw_logs, "Run tuner conv: confidence\\((.*)\\) candidates\\((.*)\\)$") assert len(logs) == 1, raw_logs assert logs[0][0] == '20' assert simple_parser(logs[0][1]) == {'relay0': [1, 0]}
def test_forward(self): a = np.random.rand(1, 3, 224, 224).astype(np.float32) b = np.random.rand(64, 3, 7, 7).astype(np.float32) c = jt.mkl_ops.mkl_conv(a, b, 2, 3).data a_jt = jt.array(a) b_jt = jt.array(b) with jt.var_scope(enable_tuner=0, compile_options={"test_mkl_conv": 1}): c_jt = conv(a_jt, b_jt, 3, 2).data with jt.log_capture_scope( enable_tuner=1, compile_options={"test_mkl_conv": 2}, log_v=0, log_vprefix="tuner_manager=100,conv_tuner=1000", ) as raw_logs: c_jt_tune = conv(a_jt, b_jt, 3, 2).data assert np.max(c_jt - c) < 1e-4 and np.max(c_jt_tune - c) < 1e-4 logs = find_log_with_re( raw_logs, "Run tuner conv: confidence\\((.*)\\) candidates\\((.*)\\)$") assert len(logs) == 1 assert logs[0][0] == '20' assert simple_parser(logs[0][1]) == {'relay0': [1, 0]}
def adam(model, loss, lr=3e-4, betas=[0.9, 0.999], eps=1e-8): ps = jt.find_vars(model) gs = jt.grad(loss, ps) with jt.var_scope('_'.join([model, 'adam']), unique=True): adam_step = jt.make_var([1], init=jt.zeros) adam_step += 1 for p,g in zip(ps,gs): m = jt.make_var(p.shape, init=jt.zeros) v = jt.make_var(p.shape, init=jt.zeros) m.assign(betas[0] * m + (1-betas[0]) * g) v.assign(betas[1] * v + (1-betas[1]) * g * g) step_size = lr * jt.sqrt(1-betas[1]**adam_step) / (1-betas[0] ** adam_step) p -= m * step_size / (jt.sqrt(v) + eps)
def test(self): a = jt.array([1, 2, 3]) a.sync() assert a.compile_options == {} a.compile_options = {"compile_shapes": 1} assert a.compile_options == {"compile_shapes": 1} b = a + a assert b.compile_options == {} with jt.var_scope(compile_options={"compile_shapes": 1}): c = a + b assert c.compile_options == {"compile_shapes": 1} with jt.profile_scope() as report: c.sync() assert len(report) == 2 and "compile_shapes:1" in report[1][0]
def test_cuda_custom_op(self): my_op = jt.compile_custom_op( """ struct MyCudaOp : Op { Var* output; MyCudaOp(NanoVector shape, string dtype="float"); const char* name() const override { return "my_cuda"; } DECLARE_jit_run; }; """, """ #ifndef JIT MyCudaOp::MyCudaOp(NanoVector shape, string dtype) { flags.set(NodeFlags::_cuda); output = create_output(shape, dtype); } void MyCudaOp::jit_prepare() { add_jit_define("T", output->dtype()); } #else // JIT #ifdef JIT_cuda __global__ void kernel(index_t n, T *x) { int index = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; for (int i = index; i < n; i += stride) x[i] = (T)-i; } void MyCudaOp::jit_run() { index_t num = output->num; auto* __restrict__ x = output->ptr<T>(); int blockSize = 256; int numBlocks = (num + blockSize - 1) / blockSize; kernel<<<numBlocks, blockSize>>>(num, x); } #endif // JIT_cuda #endif // JIT """, "my_cuda") with jt.var_scope(use_cuda=1): a = my_op([3, 4, 5], 'float') na = a.data assert a.shape == [3, 4, 5] and a.dtype == 'float' assert (-na.flatten() == range(3 * 4 * 5)).all(), na
def test_backward_nhwc_hwio(self): n,c,H,W = 2,3,5,5 o,i,h,w = 4,c,3,3 a = np.random.rand(n,H,W,c).astype(np.float32) b = np.random.rand(h,w,i,o).astype(np.float32) da = np.random.rand(n,H,W,o).astype(np.float32) dx = jt.mkl_ops.mkl_conv_backward_x(b,da,H,W,1,1,1,"acdb","hwio","acdb").data dw = jt.mkl_ops.mkl_conv_backward_w(a,da,h,1,1,1,"acdb","hwio","acdb").data a_jt = jt.array(a) b_jt = jt.array(b) with jt.var_scope( enable_tuner=0, compile_options={"test_mkl_conv":1} ): c_jt = conv_nhwc_hwio(a_jt, b_jt, 1, 1) * da gs=jt.grad(c_jt,[a_jt,b_jt]) gs.append(c_jt) jt.fetch_sync(gs) dx_jt=gs[0].data dw_jt=gs[1].data with jt.log_capture_scope( log_v=10, log_vprefix="tuner_manager=100,var_relay=100", enable_tuner=1, compile_options={"test_mkl_conv":2} ) as rawlogs: gs_tune=jt.grad(c_jt,[a_jt,b_jt]) jt.fetch_sync(gs_tune) dx_jt_tune=gs_tune[0].data dw_jt_tune=gs_tune[1].data logs = find_log_with_re(rawlogs, "Run tuner conv: confidence\\((20)\\) candidates\\((.*)\\)$") assert len(logs) == 1 assert logs[0][0] == "20", "confidence of reorder should be 20" candidates = simple_parser(logs[0][1]) assert candidates == {"relay0":[1,0],"relay1":[1,0]}, candidates logs = find_log_with_re(rawlogs, r"get_relay_src([\s\S]*)") assert len(logs)==2 assert "@relay_op" in logs[0] assert "@relay_op" in logs[1] assert np.max(dx_jt_tune-dx)<1e-5 and np.max(dw_jt_tune-dw)<1e-5 assert np.max(dx_jt-dx)<1e-5 and np.max(dw_jt-dw)<1e-5
def performance_test_scope(warmup=0, rerun=0, **args): """ profile scope example: with jt.profile_scope() as report: ...... print(report) """ assert not jt.flags.profiler_enable if skip_slow_test: jt.profiler.start(0, 0) else: jt.profiler.start(warmup, rerun) report = [] try: with jt.var_scope(**args): yield report finally: jt.profiler.stop() if skip_slow_test: report.extend([[1e30]] * 3) else: report.extend(jt.profiler.report())
def test_cuda_flags(self): with jt.var_scope(use_cuda=1): a = jt.random((10, 10)) a.sync()
def test_scope(self): prev = jt.flags.log_v with jt.var_scope(log_v=1): assert jt.flags.log_v == 1 assert jt.flags.log_v == prev