Example #1
0
 def run(use_trace, symbolic):
     a = tensor(np.array([1926.0817], dtype=np.float32))
     net = Sigmoid()
     func_run = run_saved_context
     if use_trace:
         func_run = trace(run_saved_context, symbolic=symbolic)
     s = func_run(a, net=net)
     s2 = F.sigmoid(a)
     assertTensorClose(s.numpy(), s2.numpy())
     assertTensorClose(
         F.grad(s, a, use_virtual_grad=False).numpy(),
         F.grad(s2, a, use_virtual_grad=False).numpy(),
     )
Example #2
0
def test_sgd_simple():
    data, data_shape, label, label_shape = get_input()
    mlp = MLP()
    opt = SGD(mlp.parameters(), lr=0.01, weight_decay=0.1)
    for idx in range(3):
        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        if idx % 2:
            opt.zero_grad()
        else:
            mlp.zero_grad()
        opt.backward(loss)
        grads = TensorDict()
        orig_params = TensorDict()
        for param in mlp.parameters():
            grad = F.grad(loss, param, use_virtual_grad=False)
            assertTensorClose(grad.numpy(), param.grad.numpy())
            grads[param] = np.copy(grad.numpy())
            orig_params[param] = np.copy(param.numpy())
        opt.step()
        for param in mlp.parameters():
            assertTensorClose(param.numpy(),
                              orig_params[param] * 0.999 - grads[param] * 0.01)
Example #3
0
def test_update_lr():
    data, data_shape, label, label_shape = get_input()
    mlp = MLP()
    opt = SGD(mlp.parameters(), lr=0.01)
    pred = mlp(data)
    loss = F.square_loss(pred, label.reshape(-1, 1))
    opt.zero_grad()
    opt.backward(loss)
    opt.step()
    for group in opt.param_groups:
        group["lr"] += 0.02
    for _ in range(3):
        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt.zero_grad()
        opt.backward(loss)
        for param in mlp.parameters():
            grad = F.grad(loss, param, use_virtual_grad=False)
            assertTensorClose(grad.numpy(), param.grad.numpy())
        orig_params = []
        for param in mlp.parameters():
            orig_params.append(np.copy(param.numpy()))
        opt.step()
        for param, orig_param in zip(mlp.parameters(), orig_params):
            assertTensorClose(param.numpy(),
                              orig_param - param.grad.numpy() * 0.03)
Example #4
0
def test_Clone_model():
    # 必须要将新参数clone到另一个模型中,才可以继续
    train_loader = build_dataloader()
    image_support = meg.tensor(dtype='float32')
    label_support = meg.tensor(dtype="int32")

    model = OmniglotFC(28 * 28, 5)

    model.train()
    loss_fn = F.cross_entropy_with_softmax
    optimizer = optim.SGD(model.parameters(), lr=0.05)
    iters = iter(train_loader)

    (images_support, labels_support, images_query, labels_query) = next(iters)
    i = 0
    image_support.set_value(images_support[i])
    label_support.set_value(labels_support[i])
    image_support = F.remove_axis(image_support, 1)
    label_support = F.remove_axis(label_support, 1)

    support_out = model.forward(image_support)
    support_loss = loss_fn(support_out, label_support)

    # 对需要梯度更新的参数进行更新
    params = list(model.parameters(requires_grad=True))
    params[0] = meg.tensor(np.ones((5)), dtype='float32')

    grads = F.grad(support_loss, params, use_virtual_grad=False)

    fast_weights = [p - 0.5 * g for g, p in zip(grads, params)]
Example #5
0
def train_func3(x1, y1, x2, y2, *, loss_fn, opt, net, params):
    loss = loss_fn(net(x1, weights=params), y1)
    grads = F.grad(loss, params, use_virtual_grad=False)
    fast_weights = [p - 0.5 * g for g, p in zip(grads, params)]
    # forward twice
    loss2 = loss_fn(net(x2, weights=fast_weights), y2)
    opt.backward(loss2)
Example #6
0
def test_grad_twice():
    # model define
    model = M.Sequential(M.Linear(10, 20), M.Linear(20, 10), M.Linear(10, 5))
    model.train()
    named_param = dict(list(model.named_parameters(requires_grad=True)))
    named_module = dict(list(model.named_children()))
    name_keys = list(named_param.keys())
    params = list(named_param.values())
    loss_fn = F.cross_entropy_with_softmax
    optimizer = optim.SGD(params, lr=0.003)

    # forward once
    optimizer.zero_grad()
    x1 = meg.tensor(np.random.randn(5, 10), dtype='float32')
    y1 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32')
    loss = loss_fn(model(x1), y1)
    grads = F.grad(loss,
                   params,
                   use_virtual_grad=False,
                   return_zero_for_nodep=False)
    fast_weights = [p - 0.5 * g for g, p in zip(grads, params)]

    # manual update params
    replace_parameter(named_module, dict(zip(name_keys, fast_weights)))

    # forward twice
    x2 = meg.tensor(np.random.randn(5, 10), dtype='float32')
    y2 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32')
    loss2 = loss_fn(model(x2), y2)
    # got error
    replace_parameter(named_module, named_param)
    optimizer.backward(loss2)
    optimizer.step()
Example #7
0
def test_maml_update_var():
    model = OmniglotFC(28 * 28, 5)
    model.train()
    loss_fn = F.cross_entropy_with_softmax
    old_params = list(model.parameters())
    maml = MAML(model)
    params = list(maml.named_parameters.values())
    optimizer = optim.SGD(old_params, lr=0.05)
    optimizer.zero_grad()
    support_out = model.forward(
        meg.tensor(np.random.randn(5, 28 * 28), dtype='float32'))
    support_loss = loss_fn(
        support_out, meg.tensor(np.random.randint(0, 5, (5)), dtype='int32'))
    optimizer.backward(support_loss)
    optimizer.step()
    assert id(old_params[0]) == id(params[0])
    # 手动update

    grads = F.grad(support_loss, params, use_virtual_grad=False)
    fast_weights = [p - 0.5 * g for g, p in zip(grads, params)]
    named_update = dict(zip(maml.named_parameters.keys(), fast_weights))
    named_old = dict(zip(maml.named_parameters.keys(), old_params))
    maml.replace_parameter(maml.module_table, named_update)
    # 被替换为新的值后就无法通过model.parameters()找到了。
    after_params = list(model.parameters())
    maml.module_table['classifier'].bias
    named_update['classifier.bias']
    mods = list(model.modules())
    mods[1].bias

    maml.replace_parameter(maml.module_table, named_old)
def main():
    nway = 5
    batch_size = 32
    train_loader, val_loader = build_dataset(nway=nway, batch_size=batch_size)
    model = OmniglotFC(28 * 28, nway)
    model.train()
    maml = MAML(model)

    loss_fn = F.cross_entropy_with_softmax
    opt = optim.Adam(maml.trainable_params, lr=0.003)
    accuracy = F.accuracy
    adapt_data = meg.tensor(dtype='float32')
    adapt_label = meg.tensor(dtype='int32')
    eval_data = meg.tensor(dtype='float32')
    eval_label = meg.tensor(dtype='int32')
    iteration = 0
    for ep in range(500):
        for (images_support, labels_support, images_query,
             labels_query) in train_loader:
            opt.zero_grad()
            meta_train_error = 0.0
            meta_train_accuracy = 0.0
            for i in range(batch_size):
                (image_support, label_support, image_query,
                 label_query) = (images_support[i], labels_support[i],
                                 images_query[i], labels_query[i])
                adapt_data.set_value(np.squeeze(image_support, 1))
                adapt_label.set_value(np.squeeze(label_support, 1))

                loss = loss_fn(model.forward(adapt_data), adapt_label)
                gradients = F.grad(loss,
                                   maml.trainable_params,
                                   use_virtual_grad=False,
                                   return_zero_for_nodep=False)

                fast_weights = [
                    p - 0.5 * g
                    for p, g in zip(maml.trainable_params, gradients)
                ]

                maml.replace_fast_parameter(fast_weights)
                # Evaluate the adapted model
                eval_data.set_value(np.squeeze(image_query, 1))
                eval_label.set_value(np.squeeze(label_query, 1))

                predictions = model.forward(eval_data)
                valid_error = loss_fn(predictions, eval_label)
                valid_accuracy = accuracy(predictions, eval_label)
                opt.backward(valid_error)
                meta_train_error += valid_error.numpy().item()
                meta_train_accuracy += valid_accuracy.numpy().item()

            # for p in maml.trainable_params:
            #   p.grad = p.grad * (1.0 / batch_size)
            opt.step()
            print('Iteration', iteration)
            print('Meta Train Error', meta_train_error / batch_size)
            print('Meta Train Accuracy', meta_train_accuracy / batch_size)
            iteration += 1
Example #9
0
def test_none_in_out_grad():
    class Test(Function):
        def forward(self, a, b):
            return a, b

        def backward(self, grad_a, grad_b):
            assert grad_b is None
            return (grad_a, 0)

    a = tensor(np.array([1.0], dtype=np.float32))
    b = tensor(np.array([2.0], dtype=np.float32))
    aa, bb = Test()(a, b)
    assertTensorClose(
        F.grad(aa, a, use_virtual_grad=False).numpy(),
        np.array([1.0], dtype=np.float32))
    assertTensorClose(
        F.grad(aa, b, use_virtual_grad=False).numpy(),
        np.array([0.0], dtype=np.float32))
Example #10
0
def test_a_plus_b():
    data_shape = (1, 9, 2, 6)
    av = np.random.random(data_shape).astype(np.float32)
    bv = np.random.random(data_shape).astype(np.float32)
    a = tensor(av)
    b = tensor(bv)

    class MulFunc(Function):
        def forward(self, a, b):
            return a * b

        def backward(self, grad_o):
            return (grad_o * b * 2, grad_o * a * 3)

    c = MulFunc()(a, b).sum()
    assertTensorClose(c.numpy(), (av * bv).sum())
    assertTensorClose(F.grad(c, a, use_virtual_grad=False).numpy(), bv * 2)
    assertTensorClose(F.grad(c, b, use_virtual_grad=False).numpy(), av * 3)
Example #11
0
def test_save_context():
    class Sigmoid(Function):
        def forward(self, x):
            y = 1 / (1 + F.exp(-x))
            self.save_for_backward(y)
            return y

        def backward(self, grad_y):
            (y, ) = self.saved_tensors
            return grad_y * y * (1 - y)

    a = tensor(np.array([1926.0817], dtype=np.float32))
    s = Sigmoid()(a)
    s2 = F.sigmoid(a)
    assertTensorClose(s.numpy(), s2.numpy())
    assertTensorClose(
        F.grad(s, a, use_virtual_grad=False).numpy(),
        F.grad(s2, a, use_virtual_grad=False).numpy(),
    )
Example #12
0
def train_func(x1, y1, x2, y2, *, loss_fn, opt, net, keys, params):
    # 此处data和label不再需要先创建tensor然后通过set_value赋值,这些操作在trace内部完成
    logits = net(x1, weights=dict(zip(keys, params)))
    loss = loss_fn(logits, y1)
    grads = F.grad(loss, params, use_virtual_grad=False)
    fast_weights = [p - 0.5 * g for g, p in zip(grads, params)]

    # forward twice
    loss2 = loss_fn(net(x2, weights=dict(zip(keys, fast_weights))), y2)
    opt.backward(loss2)
Example #13
0
def test_mge_81():
    np.random.seed(0)
    N, D = 3, 4
    x = mge.Parameter(value=np.random.normal(size=(N, D)).astype(np.float32))
    y = mge.Parameter(value=np.random.normal(size=(N, D)).astype(np.float32))
    z = mge.Parameter(value=np.random.normal(size=(N, D)).astype(np.float32))
    a = x * y
    b = a + z
    c = F.sum(b)
    grad_x = F.grad(c, x, use_virtual_grad=False)
    grad_y = F.grad(c, y, use_virtual_grad=False)
    grad_z = F.grad(c, z, use_virtual_grad=False)
    print(grad_x.numpy())
    print(grad_y.numpy())
    print(grad_z.numpy())
    m = M.BatchNorm2d(4)
    input = tensor(np.zeros((64, 4, 32, 32), dtype=np.float32))
    _ = m(input)
    m = M.BatchNorm2d(4, affine=False)
    _ = m(input)
Example #14
0
def test_skip_invalid_grad():
    data_shape = (1, 9, 2, 6)
    av = np.random.random(data_shape).astype(np.float32)
    bv = np.random.random(data_shape).astype(np.float32)
    a = tensor(av)
    b = tensor(bv)
    cookie = tensor(np.random.random(data_shape).astype(np.float32))

    class EqWithFakeGrad(Function):
        def forward(self, a, b):
            return a == b

        def backward(self, grad_o):
            _ = grad_o
            return cookie, cookie

    c = EqWithFakeGrad()(a, b).sum()
    assertTensorClose(c.numpy(), (av == bv).sum().astype(np.float32))
    assertTensorClose(F.grad(c, a, use_virtual_grad=False).numpy(), cookie)
    assertTensorClose(F.grad(c, b, use_virtual_grad=False).numpy(), cookie)
Example #15
0
def test_zero_grad():
    class StopGradient(Function):
        def forward(self, a):
            return a

        def backward(self, *_):
            return None

    a = tensor(np.array([1.0], dtype=np.float32))
    b = a * 3.0
    c = a * 4.0
    loss = StopGradient()(b) + c
    assertTensorClose(
        F.grad(loss, a, use_virtual_grad=False).numpy(),
        np.array([4.0], dtype=np.float32),
    )
Example #16
0
def test_ste():
    class STE(Function):
        def forward(self, x):
            maxv, minv = x.max(), x.min()
            scale = F.maximum(maxv, -minv) / 127
            return F.round(x / scale) * scale

        def backward(self, grad_y):
            return grad_y

    data_shape = (1, 9, 2, 6)
    av = np.random.random(data_shape).astype(np.float32)
    a = tensor(av)
    q = STE()(a)
    q_2 = (q * 2.0).sum()
    assertTensorClose(
        F.grad(q_2, a, use_virtual_grad=False).numpy(),
        np.broadcast_to(np.array([2.0], dtype=np.float32), data_shape),
    )