예제 #1
0
파일: test_scope.py 프로젝트: zzmcdc/jittor
 def test_var(self):
     jt.clean()
     for i in range(2):
         x = jt.array([[1]])
         y = model(x)
         params = jt.find_vars()
         assert len(params) == 3
         names = [p.name() for p in params]
         assert names == [
             "model/linear_0/var_0",
             "model/linear_1/var_0",
             "model/linear_2/var_0",
         ], str(names)
         jt.find_var("model/linear_0/var_0")
         expect_error(lambda: jt.find_var("model/linear_0"))
         expect_error(lambda: jt.find_var("model/linear"))
         assert len(jt.find_vars("model/linear_0/var_0")) == 1
         assert len(jt.find_vars("model/linear_0/")) == 1
         assert len(jt.find_vars("model/")) == 3
     jt.clean()
예제 #2
0
def adam(model, loss, lr=3e-4, betas=[0.9, 0.999], eps=1e-8):
    ps = jt.find_vars(model)
    gs = jt.grad(loss, ps)
    with jt.var_scope('_'.join([model, 'adam']), unique=True):
        adam_step = jt.make_var([1], init=jt.zeros)
        adam_step += 1
        for p,g in zip(ps,gs):
            m = jt.make_var(p.shape, init=jt.zeros)
            v = jt.make_var(p.shape, init=jt.zeros)
            
            m.assign(betas[0] * m + (1-betas[0]) * g)
            v.assign(betas[1] * v + (1-betas[1]) * g * g)
            step_size = lr * jt.sqrt(1-betas[1]**adam_step) / (1-betas[0] ** adam_step)
            p -= m * step_size / (jt.sqrt(v) + eps)
예제 #3
0
 def test_longest_dis_fuse(self):
     x = jt.array(np.random.rand(1, 3, 224, 224).astype(np.float32))
     loss = jt.sum(resnet_fake(x))
     ps = jt.find_vars('resnet_fake')
     gs = jt.grad(loss, ps)
     jt.sync(gs)
     # assert not alloc big tensor
     g = jt.dump_all_graphs()
     for s in g.nodes_info:
         if not s.startswith("Var"):
             continue
         shape = s.split("[")[1].split("]")[0].split(",")
         ptr = s.split("(")[1].split(")")[0].split(",")[-1]
         if ptr != '0':
             assert len(shape) <= 5, s
예제 #4
0
파일: example.py 프로젝트: zzmcdc/jittor
batch_size = 50
base_lr = 0.05
# we need to stop grad of global value to prevent memory leak
lr = f32(base_lr).name("lr").stop_grad()


def get_data(n):
    for i in range(n):
        x = np.random.rand(batch_size, 1)
        y = x * x
        yield np.float32(x), np.float32(y)


for i, (x, y) in enumerate(get_data(n)):
    pred_y = model(x).name("pred_y")
    loss = ((pred_y - y)**f32(2)).name("loss")
    loss_mean = loss.mean()

    ps = jt.find_vars('model')
    gs = jt.grad(loss_mean, ps)
    for p, g in zip(ps, gs):
        p -= g * lr
    if i > 2:
        assert prev == jt.liveness_info(
        ), f"memory leak {prev} {jt.liveness_info()}"
    prev = jt.liveness_info()
    print(f"step {i}, loss = {loss_mean().sum()}")

# result is 0.0009948202641680837
result = 0.0009948202641680837
assert abs(loss_mean.data - result) < 1e-6