Beispiel #1
0
def test_maml_update_var():
    model = OmniglotFC(28 * 28, 5)
    model.train()
    loss_fn = F.cross_entropy_with_softmax
    old_params = list(model.parameters())
    maml = MAML(model)
    params = list(maml.named_parameters.values())
    optimizer = optim.SGD(old_params, lr=0.05)
    optimizer.zero_grad()
    support_out = model.forward(
        meg.tensor(np.random.randn(5, 28 * 28), dtype='float32'))
    support_loss = loss_fn(
        support_out, meg.tensor(np.random.randint(0, 5, (5)), dtype='int32'))
    optimizer.backward(support_loss)
    optimizer.step()
    assert id(old_params[0]) == id(params[0])
    # 手动update

    grads = F.grad(support_loss, params, use_virtual_grad=False)
    fast_weights = [p - 0.5 * g for g, p in zip(grads, params)]
    named_update = dict(zip(maml.named_parameters.keys(), fast_weights))
    named_old = dict(zip(maml.named_parameters.keys(), old_params))
    maml.replace_parameter(maml.module_table, named_update)
    # 被替换为新的值后就无法通过model.parameters()找到了。
    after_params = list(model.parameters())
    maml.module_table['classifier'].bias
    named_update['classifier.bias']
    mods = list(model.modules())
    mods[1].bias

    maml.replace_parameter(maml.module_table, named_old)
Beispiel #2
0
def run_syncbn(trace_mode):
    x = F.ones([2, 16, 4, 4], dtype="float32")

    net = Sequential(
        Conv2d(16, 16, 1), SyncBatchNorm(16), Conv2d(16, 16, 1), SyncBatchNorm(16),
    )

    gm = ad.GradManager().attach(
        net.parameters(), callbacks=dist.make_allreduce_cb("MEAN")
    )
    opt = optimizer.SGD(net.parameters(), 1e-3)

    def train_func(x):
        with gm:
            y = net(x)
            loss = y.mean()
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    if trace_mode is not None:
        train_func = trace(train_func, symbolic=trace_mode)

    for _ in range(3):
        loss = train_func(x)
        loss.numpy()
Beispiel #3
0
def test_output_copy_trace():
    class Simple(Module):
        def __init__(self):
            super().__init__()
            self.a = Parameter([1.0], dtype=np.float32)

        def forward(self, x):
            x = x * self.a
            # will result into a copy of output in grad
            x = F.exp(x)
            return x

    ys = {False: [], True: []}

    for symbolic in [False, True]:
        net = Simple()
        gm = GradManager().attach(net.parameters())
        opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
        data = tensor(np.arange(4).reshape(2, 2), dtype="float32")

        @trace(symbolic=symbolic)
        def train_func(d):
            with gm:
                loss = net(d)
                gm.backward(loss)
                opt.step().clear_grad()
            return loss

        for i in range(3):
            y = train_func(data).numpy()
            ys[symbolic].append(y)

    for i in range(3):
        np.testing.assert_equal(ys[False][i], ys[True][i])
Beispiel #4
0
def test_none_in_out_grad():
    class Test(Function):
        def forward(self, a, b):
            return a, b

        def backward(self, grad_a, grad_b):
            assert grad_b is None
            return (grad_a, None)

    class Simple(Module):
        def __init__(self, a, b):
            super().__init__()
            self.a = Parameter(a, dtype=np.float32)
            self.b = Parameter(b, dtype=np.float32)
            self.layer = Test()

        def forward(self):
            aa, bb = self.layer(self.a, self.b)
            return aa, bb

    a = tensor(np.array([1.0], dtype=np.float32))
    b = tensor(np.array([2.0], dtype=np.float32))
    net = Simple(a, b)
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    gm = ad.GradManager().attach(net.parameters())
    optim.clear_grad()
    with gm:
        loss, _ = net()
        gm.backward(loss)
    optim.step()

    np.testing.assert_almost_equal(net.a.numpy(),
                                   np.array([1.0 - 1.0], dtype=np.float32))
    np.testing.assert_almost_equal(net.b.numpy(),
                                   np.array([2.0 - 0.0], dtype=np.float32))
Beispiel #5
0
def test_single_input():
    data_shape = (9, 2, 6)
    av = np.random.random(data_shape).astype(np.float32)

    class MulFunc(Function):
        def forward(self, a):
            self.a = a
            return a * 10

        def backward(self, grad_o):
            return grad_o * 10

    class Simple(Module):
        def __init__(self, a):
            super().__init__()
            self.a = Parameter(a, dtype=np.float32)
            self.layer1 = MulFunc()

        def forward(self):
            x = self.layer1(self.a)
            return x

    net = Simple(av)
    gm = ad.GradManager().attach(net.parameters())
    opt = optimizer.SGD(net.parameters(), lr=1.0)

    opt.clear_grad()
    with gm:
        loss = net()
        gm.backward(loss.sum())
    opt.step()

    np.testing.assert_almost_equal(loss.numpy(), (av * 10))
    np.testing.assert_almost_equal(net.a.numpy(), (av - 10))
Beispiel #6
0
    def run(step, enable_trace, use_symbolic):
        def train_func(data, net=None, opt=None):
            pred = net(data)
            opt.backward(pred)
            return pred

        if enable_trace:
            train_func = trace(train_func, symbolic=use_symbolic)

        net = Mixed()
        data = tensor()
        opt = optimizer.SGD(net.parameters(), lr=lr)

        saved_param = init_param
        for i in range(step):
            opt.zero_grad()
            data.set_value([i + 1.0])
            output = train_func(data, net=net, opt=opt)
            opt.step()

            expect_param = (
                saved_param[0] - lr * saved_param[1] * data.numpy(),
                saved_param[1] - lr * saved_param[0] * data.numpy(),
            )
            assertTensorClose(
                output.numpy(), saved_param[0] * saved_param[1] * data.numpy()
            )
            torch_param = net.torch_module._torch_params[0].detach().cpu()
            assertTensorClose(torch_param.numpy(), expect_param[0])
            assertTensorClose(net.multiplier.numpy(), expect_param[1])
            saved_param = expect_param
Beispiel #7
0
def test_Clone_model():
    # 必须要将新参数clone到另一个模型中,才可以继续
    train_loader = build_dataloader()
    image_support = meg.tensor(dtype='float32')
    label_support = meg.tensor(dtype="int32")

    model = OmniglotFC(28 * 28, 5)

    model.train()
    loss_fn = F.cross_entropy_with_softmax
    optimizer = optim.SGD(model.parameters(), lr=0.05)
    iters = iter(train_loader)

    (images_support, labels_support, images_query, labels_query) = next(iters)
    i = 0
    image_support.set_value(images_support[i])
    label_support.set_value(labels_support[i])
    image_support = F.remove_axis(image_support, 1)
    label_support = F.remove_axis(label_support, 1)

    support_out = model.forward(image_support)
    support_loss = loss_fn(support_out, label_support)

    # 对需要梯度更新的参数进行更新
    params = list(model.parameters(requires_grad=True))
    params[0] = meg.tensor(np.ones((5)), dtype='float32')

    grads = F.grad(support_loss, params, use_virtual_grad=False)

    fast_weights = [p - 0.5 * g for g, p in zip(grads, params)]
def test_sgd_momentum():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    data = tensor([2.34])

    # do a step of train
    with gm:
        loss = net(data)
        gm.backward(loss)
    optim.step()

    np.testing.assert_almost_equal(
        optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

    # do a step of infer
    loss = net(data)
    np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)

    np.testing.assert_almost_equal(
        optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

    # do a step of train
    optim.clear_grad()
    with gm:
        loss = net(data)
        gm.backward(loss)
    optim.step()

    np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)
    np.testing.assert_almost_equal(
        optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34)
Beispiel #9
0
def test_grad_twice_method_3():
    # model define
    model = CustomModel3()
    model.train()
    named_param = dict(list(model.named_parameters(requires_grad=True)))
    params = list(named_param.values())
    external_params = [
        meg.Parameter(np.random.normal(size=p.shape), dtype='float32')
        for p in params
    ]

    loss_fn = F.cross_entropy_with_softmax
    optimizer = optim.SGD(external_params, lr=0.003)

    # forward once
    optimizer.zero_grad()
    x1 = meg.tensor(np.random.randn(5, 10), dtype='float32')
    y1 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32')
    x2 = meg.tensor(np.random.randn(5, 10), dtype='float32')
    y2 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32')

    train_func3(x1,
                y1,
                x2,
                y2,
                loss_fn=loss_fn,
                opt=optimizer,
                net=model,
                params=external_params)
    optimizer.step()
Beispiel #10
0
def test_grad_twice_method_2():
    # model define
    model = CustomModel()
    model.train()
    named_param = dict(list(model.named_parameters(requires_grad=True)))
    name_keys = list(named_param.keys())
    params = list(named_param.values())

    loss_fn = F.cross_entropy_with_softmax
    optimizer = optim.SGD(params, lr=0.003)

    # forward once
    optimizer.zero_grad()
    x1 = meg.tensor(np.random.randn(5, 10), dtype='float32')
    y1 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32')
    x2 = meg.tensor(np.random.randn(5, 10), dtype='float32')
    y2 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32')

    train_func(x1,
               y1,
               x2,
               y2,
               loss_fn=loss_fn,
               opt=optimizer,
               net=model,
               keys=name_keys,
               params=params)
    optimizer.step()
Beispiel #11
0
def test_grad_twice():
    # model define
    model = M.Sequential(M.Linear(10, 20), M.Linear(20, 10), M.Linear(10, 5))
    model.train()
    named_param = dict(list(model.named_parameters(requires_grad=True)))
    named_module = dict(list(model.named_children()))
    name_keys = list(named_param.keys())
    params = list(named_param.values())
    loss_fn = F.cross_entropy_with_softmax
    optimizer = optim.SGD(params, lr=0.003)

    # forward once
    optimizer.zero_grad()
    x1 = meg.tensor(np.random.randn(5, 10), dtype='float32')
    y1 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32')
    loss = loss_fn(model(x1), y1)
    grads = F.grad(loss,
                   params,
                   use_virtual_grad=False,
                   return_zero_for_nodep=False)
    fast_weights = [p - 0.5 * g for g, p in zip(grads, params)]

    # manual update params
    replace_parameter(named_module, dict(zip(name_keys, fast_weights)))

    # forward twice
    x2 = meg.tensor(np.random.randn(5, 10), dtype='float32')
    y2 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32')
    loss2 = loss_fn(model(x2), y2)
    # got error
    replace_parameter(named_module, named_param)
    optimizer.backward(loss2)
    optimizer.step()
Beispiel #12
0
def test_clear_grad():
    class StopGradient(Function):
        def forward(self, a):
            return a

        def backward(self, *_):
            return None

    class Simple(Module):
        def __init__(self, a):
            super().__init__()
            self.a = Parameter(a, dtype=np.float32)
            self.layer = StopGradient()

        def forward(self):
            b = self.a * 3.0
            c = self.a * 4.0
            return self.layer(b) + c

    a = tensor(np.array([1.0], dtype=np.float32))
    net = Simple(a)
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    gm = ad.GradManager().attach(net.parameters())
    optim.clear_grad()

    with gm:
        loss = net()
        gm.backward(loss.sum())
    optim.step()
    np.testing.assert_almost_equal(
        net.a.numpy(),
        np.array([1.0 - 4.0], dtype=np.float32),
    )
Beispiel #13
0
    def worker():
        rank = dist.get_rank()
        size = dist.get_world_size()
        x = mge.tensor(np.random.randn(1, rank * 2 + 2), dtype=np.float32)
        m = M.Linear(rank * 2 + 2, rank * 2 + 4)
        gm = GradManager().attach(m.parameters())
        opt = optim.SGD(m.parameters(), 1e-3, momentum=0.9)

        def train_func(x):
            with gm:
                if rank != 0:
                    x = dist.functional.remote_recv(rank - 1,
                                                    shape=(1, rank * 2 + 2),
                                                    dtype=np.float32)
                y = m(x)
                if rank != size - 1:
                    dist.functional.remote_send(y, dest_rank=rank + 1)
                    gm.backward()
                else:
                    y = y.mean()
                    gm.backward(y)
                opt.step().clear_grad()

        train_funcs = [
            train_func,
            trace(symbolic=False)(train_func),
            trace(symbolic=True)(train_func),
        ]

        for func in train_funcs:
            for i in range(3):
                func(x)
Beispiel #14
0
def worker(rank, world_size, args):
    if world_size > 1:
        dist.init_process_group(
            master_ip="localhost",
            master_port=23456,
            world_size=world_size,
            rank=rank,
            dev=rank,
        )
        logger.info("Init process group for gpu%d done", rank)

    sys.path.insert(0, os.path.dirname(args.file))
    current_network = importlib.import_module(
        os.path.basename(args.file).split(".")[0])

    model = current_network.Net(current_network.Cfg(),
                                batch_size=args.batch_size)
    params = model.parameters(requires_grad=True)
    model.train()

    if rank == 0:
        logger.info(get_config_info(model.cfg))
    opt = optim.SGD(
        params,
        lr=model.cfg.basic_lr * world_size * model.batch_size,
        momentum=model.cfg.momentum,
        weight_decay=model.cfg.weight_decay,
    )

    if args.weight_file is not None:
        weights = mge.load(args.weight_file)
        model.backbone.bottom_up.load_state_dict(weights)

    logger.info("Prepare dataset")
    loader = build_dataloader(model.batch_size, args.dataset_dir, model.cfg)
    train_loader = iter(loader["train"])

    for epoch_id in range(model.cfg.max_epoch):
        for param_group in opt.param_groups:
            param_group["lr"] = (model.cfg.basic_lr * world_size *
                                 model.batch_size *
                                 (model.cfg.lr_decay_rate**bisect.bisect_right(
                                     model.cfg.lr_decay_sates, epoch_id)))

        tot_steps = model.cfg.nr_images_epoch // (model.batch_size *
                                                  world_size)
        train_one_epoch(model, train_loader, opt, tot_steps, rank, epoch_id,
                        world_size)
        if rank == 0:
            save_path = "log-of-{}/epoch_{}.pkl".format(
                os.path.basename(args.file).split(".")[0], epoch_id)
            mge.save(
                {
                    "epoch": epoch_id,
                    "state_dict": model.state_dict()
                },
                save_path,
            )
            logger.info("dump weights to %s", save_path)
Beispiel #15
0
def test_save_load():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    data = tensor([2.34])

    with gm:
        loss = net(data)
        gm.backward(loss)

    optim.step()

    model_name = "simple.pkl"
    print("save to {}".format(model_name))

    mge.save(
        {
            "name": "simple",
            "state_dict": net.state_dict(),
            "opt_state": optim.state_dict(),
        },
        model_name,
    )

    # Load param to cpu
    checkpoint = mge.load(model_name, map_location="cpu0")
    device_save = mge.get_default_device()
    mge.set_default_device("cpu0")
    net = Simple()
    net.load_state_dict(checkpoint["state_dict"])
    optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
    optim.load_state_dict(checkpoint["opt_state"])
    print("load done")
    os.remove("simple.pkl")

    with gm:
        loss = net([1.23])
        gm.backward(loss)

    optim.step()
    # Restore device
    mge.set_default_device(device_save)
Beispiel #16
0
def worker(rank, gpu_num, args):
    # using sublinear
    os.environ[
        "MGB_COMP_GRAPH_OPT"] = "enable_sublinear_memory_opt=1;seq_opt.enable_seq_comp_node_opt=0"
    os.environ["MGB_SUBLINEAR_MEMORY_GENETIC_NR_ITER"] = '50'
    if gpu_num > 1:
        dist.init_process_group(
            master_ip="localhost",
            master_port=23456,
            world_size=gpu_num,
            rank=rank,
            dev=rank,
        )
        logger.info("Init process group for gpu%d done", rank)

    model = network.Network()
    params = model.parameters(requires_grad=True)
    model.train()

    opt = optim.SGD(
        params,
        lr=cfg.basic_lr * gpu_num * cfg.batch_per_gpu,
        momentum=cfg.momentum,
        weight_decay=cfg.weight_decay,
    )
    if cfg.pretrain_weight is not None:
        weights = mge.load(cfg.pretrain_weight)
        del weights['fc.weight']
        del weights['fc.bias']
        model.resnet50.load_state_dict(weights)

    logger.info("Prepare dataset")
    train_loader = dataset.train_dataset(rank)
    logger.info("Training...")
    for epoch_id in range(cfg.max_epoch):
        for param_group in opt.param_groups:
            param_group["lr"] = (cfg.basic_lr * gpu_num * cfg.batch_per_gpu *
                                 (cfg.lr_decay_rate**bisect.bisect_right(
                                     cfg.lr_decay_sates, epoch_id)))

        max_steps = cfg.nr_images_epoch // (cfg.batch_per_gpu * gpu_num)
        train_one_epoch(model, train_loader, opt, max_steps, rank, epoch_id,
                        gpu_num)
        if rank == 0:
            save_path = os.path.join(cfg.model_dir,
                                     'epoch_{}.pkl'.format(epoch_id + 1))
            mge.save(
                {
                    "epoch": epoch_id,
                    "state_dict": model.state_dict()
                },
                save_path,
            )
            logger.info("dump weights to %s", save_path)
Beispiel #17
0
def test_bn_no_track_stat():
    nchannel = 3
    m = BatchNorm2d(nchannel, track_running_stats=False)

    gm = ad.GradManager().attach(m.parameters())
    optim = optimizer.SGD(m.parameters(), lr=1.0)
    optim.clear_grad()

    data = np.random.random((6, nchannel, 2, 2)).astype("float32")
    with gm:
        loss = m(data).sum()
        gm.backward(loss)
    optim.step()
def test_hello_world():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    data = tensor([2.34])
    with gm:
        loss = net(data)
        gm.backward(loss)
    optim.step()
    np.testing.assert_almost_equal(net.a.numpy(),
                                   np.array([1.23 - 2.34]).astype(np.float32))
Beispiel #19
0
def test_optimizer_serialization():
    data, data_shape, label, label_shape = get_input()
    mlp = MLP()
    opt = optimizer.SGD(mlp.parameters(), lr=0.01, momentum=0.9)
    slots = TensorDict()
    for param in mlp.parameters():
        slots[param] = np.zeros(param.shape).astype(np.float32)

    pred = mlp(data)
    loss = F.square_loss(pred, label.reshape(-1, 1))
    opt.zero_grad()
    opt.backward(loss)
    opt.step()
    for param in mlp.parameters():
        slots[param] = slots[param] * 0.9 + param.grad.numpy()

    with BytesIO() as fout:
        save(opt.state_dict(), fout)
        fout.seek(0)
        state_dict = load(fout)
        opt1 = optimizer.SGD(mlp.parameters(), lr=0.02, momentum=0.8)
        opt1.load_state_dict(state_dict)

        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt1.zero_grad()
        opt1.backward(loss)
        orig_params = TensorDict()
        for param in mlp.parameters():
            orig_params[param] = np.copy(param.numpy())
        opt1.step()
        for param in mlp.parameters():
            orig_param = orig_params[param]
            slots[param] = slots[param] * 0.9 + param.grad.numpy()
            assertTensorClose(param.numpy(), orig_param - 0.01 * slots[param])
Beispiel #20
0
def train_pipeline():
    m = ResNet18Pipeline()
    x = F.ones([32, 3, 224, 224])
    label = F.zeros([
        32,
    ], dtype="int32")

    gm = ad.GradManager().attach(m.parameters())
    opt = optim.SGD(m.parameters(), 1e-3, 0.9, 1e-4)

    for _ in range(2):
        m(x)
        loss = m.backward(label, gm)
        opt.step().clear_grad()
        print(loss)
Beispiel #21
0
def test_clip_grad_norm():
    net = Net()
    x = mge.tensor(np.random.randn(10, 3, 224, 224))
    gm = ad.GradManager().attach(net.parameters())
    opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
    with gm:
        loss = net(x).sum()
        gm.backward(loss)
    save_grad_value(net)
    max_norm = 1.0
    original_norm = optim.clip_grad_norm(net.parameters(), max_norm=max_norm, ord=2)
    scale = max_norm / original_norm
    for param in net.parameters():
        np.testing.assert_almost_equal(param.grad.numpy(), param.grad_backup * scale)
    opt.step().clear_grad()
Beispiel #22
0
def test_detach():
    net = Simple()

    optim = optimizer.SGD(net.parameters(), lr=1.0)
    optim.clear_grad()
    gm = ad.GradManager().attach(net.parameters())

    dshape = (10, 10)
    data = tensor(np.ones(dshape).astype(np.float32))
    with gm:
        loss = net(data).sum()
        gm.backward(loss)
    optim.step()
    np.testing.assert_equal(net.a.numpy(), np.array([1.0]).astype(np.float32))
    np.testing.assert_equal(net.b.numpy(),
                            np.array([1.0 - 10.0 * 10.0]).astype(np.float32))
Beispiel #23
0
def test_sgd_momentum_trace():
    origin_inplace = os.getenv("MEGENGINE_INPLACE_UPDATE")
    symbolic = (True, False)
    inplace = (0, 1)
    for symbolic, inplace in itertools.product(symbolic, inplace):
        os.environ["MEGENGINE_INPLACE_UPDATE"] = str(inplace)

        @trace(symbolic=symbolic)
        def train_func(data, *, model=None, optim=None, gm=None):
            optim.clear_grad()
            with gm:
                loss = net(data)
                gm.backward(loss)
            optim.step()
            return loss

        @trace(symbolic=symbolic)
        def eval_func(data, *, model=None, optim=None, gm=None):
            loss = net(data)
            return loss

        net = Simple()
        optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
        gm = ad.GradManager().attach(net.parameters())
        data = tensor([2.34])
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do 3 steps of infer
        for _ in range(3):
            loss = eval_func(data)
            np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34),
                                           5)
            np.testing.assert_almost_equal(
                optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do a step of train
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34,
            5)
    if origin_inplace:
        os.environ["MEGENGINE_INPLACE_UPDATE"] = origin_inplace
    else:
        del os.environ["MEGENGINE_INPLACE_UPDATE"]
def test_advance_indexing_with_subtensor():
    net = Simple2()

    gm = ad.GradManager().attach(net.parameters())
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    optim.clear_grad()

    dshape = (2, 3, 4, 3, 4, 2)
    raw_data = np.arange(576).reshape(dshape).astype(np.float32)
    data = tensor(raw_data)
    answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum()
    with gm:
        loss = net(data).sum()
        gm.backward(loss)
    optim.step()
    np.testing.assert_almost_equal(net.a.numpy(),
                                   np.array([answer]).astype(np.float32))
Beispiel #25
0
def run_frozen_bn(BNModule, is_training, use_trace, use_symbolic):
    nchannel = 3
    m = BNModule(nchannel, freeze=True)
    if is_training:
        m.train()
    else:
        m.eval()
    var = 4.0
    bias = 1.0
    shape = (1, nchannel, 1, 1)
    m.running_var[...] = var * F.ones(shape)
    m.running_mean[...] = bias * F.ones(shape)

    saved_var = m.running_var.numpy()
    saved_mean = m.running_mean.numpy()
    saved_wt = m.weight.numpy()
    saved_bias = m.bias.numpy()

    gm = ad.GradManager().attach(m.parameters())
    optim = optimizer.SGD(m.parameters(), lr=1.0)
    optim.clear_grad()

    data = np.random.random((6, nchannel, 2, 2)).astype("float32")

    def train_fn(d):
        for _ in range(3):
            with gm:
                loss = m(d).mean()
                gm.backward(loss)
            optim.step()
        return loss

    if use_trace:
        train_fn = trace(train_fn, symbolic=use_symbolic)

    for _ in range(3):
        loss = train_fn(megengine.tensor(data))
        if not is_training:
            np.testing.assert_equal(m.running_var.numpy(), saved_var)
            np.testing.assert_equal(m.running_mean.numpy(), saved_mean)
            np.testing.assert_almost_equal(
                loss.numpy(), ((data - bias) / np.sqrt(var)).mean(), 5
            )
        np.testing.assert_equal(m.weight.numpy(), saved_wt)
        np.testing.assert_equal(m.bias.numpy(), saved_bias)
Beispiel #26
0
def test_load_state_dict_no_cache(monkeypatch):
    with monkeypatch.context() as mk:
        mk.setenv("MEGENGINE_INPLACE_UPDATE", "1")
        net = Net()

        optim = optimizer.SGD(net.parameters(), lr=0.1)
        gm = ad.GradManager().attach(net.parameters())
        state = {
            "fc.weight": np.array([[0]], dtype=np.float32),
            "fc.bias": np.array([0.0], dtype=np.float32),
        }
        net.load_state_dict(state)
        images = mge.tensor([[0]], dtype=np.float32)
        with gm:
            loss = net(images)
            gm.backward(loss)
            optim.step()
            optim.clear_grad()
Beispiel #27
0
def test_sgd_momentum(monkeypatch, trace_mode, inplace_mode):
    with monkeypatch.context() as mk:
        mk.setenv("MEGENGINE_INPLACE_UPDATE", str(int(inplace_mode)))

        def train_func(data, *, model=None, optim=None, gm=None):
            optim.clear_grad()
            with gm:
                loss = net(data)
                gm.backward(loss)
            optim.step()
            return loss

        if trace_mode is not None:
            train_func = trace(symbolic=trace_mode)(train_func)

        def eval_func(data, *, model=None, optim=None, gm=None):
            loss = net(data)
            return loss

        if trace_mode is not None:
            eval_func = trace(symbolic=trace_mode)(eval_func)

        net = Simple()
        optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
        gm = ad.GradManager().attach(net.parameters())
        data = tensor([2.34])
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do 3 steps of infer
        for _ in range(3):
            loss = eval_func(data)
            np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34),
                                           5)
            np.testing.assert_almost_equal(
                optim._state[net.a]["momentum_buffer"].numpy(), 2.34)

        # do a step of train
        train_func(data, model=net, optim=optim, gm=gm)
        np.testing.assert_almost_equal(loss.numpy(), 2.34 * (1.23 - 2.34), 5)
        np.testing.assert_almost_equal(
            optim._state[net.a]["momentum_buffer"].numpy(), 0.9 * 2.34 + 2.34,
            5)
def test_advance_indexing():
    net = Simple()

    gm = ad.GradManager().attach(net.parameters())
    optim = optimizer.SGD(net.parameters(), lr=1.0)
    optim.clear_grad()

    dshape = (10, 10)
    raw_data = np.arange(100).reshape(dshape).astype(np.float32)
    raw_mask = (np.random.random_sample(dshape) > 0.5).astype(np.bool_)
    data = tensor(raw_data)
    mask = tensor(raw_mask)
    answer = 1.0 - raw_data[raw_mask].sum()
    with gm:
        loss = net(data, mask).sum()
        gm.backward(loss)
    optim.step()
    np.testing.assert_almost_equal(net.a.numpy(),
                                   np.array([answer]).astype(np.float32))
Beispiel #29
0
def test_elemwise_fuse_in_grad(trace_mode):
    w = Parameter(np.ones([4, 6]), dtype="float32")

    gm = GradManager().attach(w)
    opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4)

    # explicitly declare opt_level as 2
    @trace(symbolic=trace_mode, opt_level=2)
    def f():
        with gm:
            wm = F.sum(w**2, axis=1)**0.5
            loss = wm.mean()
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    for i in range(3):
        y = f()
        y.numpy()
Beispiel #30
0
def test_clip_grad_value():
    net = Net()
    x = np.random.randn(10, 3, 224, 224).astype("float32")
    gm = ad.GradManager().attach(net.parameters())
    opt = optim.SGD(net.parameters(), 1e-3, momentum=0.9)
    with gm:
        y = net(mge.tensor(x))
        y = y.mean()
        gm.backward(y)
    save_grad_value(net)
    max_val = 5
    min_val = -2
    optim.clip_grad_value(net.parameters(), lower=min_val, upper=max_val)
    for param in net.parameters():
        np.testing.assert_almost_equal(
            param.grad.numpy(),
            np.maximum(np.minimum(param.grad_backup, max_val), min_val),
        )
    opt.step().clear_grad()