def test_var(self): jt.clean() for i in range(2): x = jt.array([[1]]) y = model(x) params = jt.find_vars() assert len(params) == 3 names = [p.name() for p in params] assert names == [ "model/linear_0/var_0", "model/linear_1/var_0", "model/linear_2/var_0", ], str(names) jt.find_var("model/linear_0/var_0") expect_error(lambda: jt.find_var("model/linear_0")) expect_error(lambda: jt.find_var("model/linear")) assert len(jt.find_vars("model/linear_0/var_0")) == 1 assert len(jt.find_vars("model/linear_0/")) == 1 assert len(jt.find_vars("model/")) == 3 jt.clean()
def adam(model, loss, lr=3e-4, betas=[0.9, 0.999], eps=1e-8): ps = jt.find_vars(model) gs = jt.grad(loss, ps) with jt.var_scope('_'.join([model, 'adam']), unique=True): adam_step = jt.make_var([1], init=jt.zeros) adam_step += 1 for p,g in zip(ps,gs): m = jt.make_var(p.shape, init=jt.zeros) v = jt.make_var(p.shape, init=jt.zeros) m.assign(betas[0] * m + (1-betas[0]) * g) v.assign(betas[1] * v + (1-betas[1]) * g * g) step_size = lr * jt.sqrt(1-betas[1]**adam_step) / (1-betas[0] ** adam_step) p -= m * step_size / (jt.sqrt(v) + eps)
def test_longest_dis_fuse(self): x = jt.array(np.random.rand(1, 3, 224, 224).astype(np.float32)) loss = jt.sum(resnet_fake(x)) ps = jt.find_vars('resnet_fake') gs = jt.grad(loss, ps) jt.sync(gs) # assert not alloc big tensor g = jt.dump_all_graphs() for s in g.nodes_info: if not s.startswith("Var"): continue shape = s.split("[")[1].split("]")[0].split(",") ptr = s.split("(")[1].split(")")[0].split(",")[-1] if ptr != '0': assert len(shape) <= 5, s
batch_size = 50 base_lr = 0.05 # we need to stop grad of global value to prevent memory leak lr = f32(base_lr).name("lr").stop_grad() def get_data(n): for i in range(n): x = np.random.rand(batch_size, 1) y = x * x yield np.float32(x), np.float32(y) for i, (x, y) in enumerate(get_data(n)): pred_y = model(x).name("pred_y") loss = ((pred_y - y)**f32(2)).name("loss") loss_mean = loss.mean() ps = jt.find_vars('model') gs = jt.grad(loss_mean, ps) for p, g in zip(ps, gs): p -= g * lr if i > 2: assert prev == jt.liveness_info( ), f"memory leak {prev} {jt.liveness_info()}" prev = jt.liveness_info() print(f"step {i}, loss = {loss_mean().sum()}") # result is 0.0009948202641680837 result = 0.0009948202641680837 assert abs(loss_mean.data - result) < 1e-6