예제 #1
0
def test_compile_multi_times_static():
    return  # XXX: rewrite or remove this test
    with Graph() as cg:
        cg.set_option("eager_evaluation", False)
        data = Input("data", shape=(2, 28))
        label = Input("label", shape=(2, ), dtype=np.int32)

        mlp = MLP()
        opt = SGD(mlp.parameters(requires_grad=True), lr=0.01)

        pred0 = mlp(data)
        pred = F.softmax(pred0)
        loss = F.square_loss(pred, label.reshape(2, 1))
        opt.zero_grad()
        grads = opt.backward(loss)
        opt.step()

        f0 = compile(pred, None)
        f1 = compile([pred, loss], grads, copy=True)

        data = np.random.random((2, 28)).astype(np.float32)
        label = np.random.randint(0, 10, (2, )).astype(np.float32)
        out0 = f0(data=data)
        out1 = f1(data=data, label=label)
        assertTensorClose(out0[0], out1[0])

        _ = compile([pred, loss], grads, copy=False)
        with pytest.raises(mgb.MegBrainError):
            f0(data=data)
예제 #2
0
def test_multi_step_lr():
    mlp = MLP()
    opt = SGD(mlp.parameters(), lr=0.01, momentum=0.9)
    scheduler = MultiStepLR(opt, [3, 6, 8])

    lr = np.array(0.01, dtype=np.float32)
    for i in range(10):
        for group in opt.param_groups:
            assertTensorClose(
                np.array(group["lr"], dtype=np.float32),
                (lr * 0.1**bisect_right([3, 6, 8], i)).astype(np.float32),
                max_err=5e-6,
            )
        scheduler.step()
예제 #3
0
def test_dump_model():
    data_shape = (2, 28)
    data = tensor()
    data.set_value(np.random.random(data_shape))
    mlp = MLP()
    pred = mlp(data)
    with tempfile.NamedTemporaryFile() as f:
        mge.dump(pred, f.name)
예제 #4
0
def test_load_quantized():
    data_shape = (2, 28)
    data = tensor(np.random.random(data_shape), dtype="float32")
    data = data.astype(mgb.dtype.qint8(0.1))
    mlp = MLP()
    quantize_qat(mlp)
    quantize(mlp)
    mlp.dense0.weight = Parameter(
        mlp.dense0.weight.astype(mgb.dtype.qint8(0.001)).numpy())
    mlp.dense1.weight = Parameter(
        mlp.dense1.weight.astype(mgb.dtype.qint8(0.0002)).numpy())
    mlp.eval()
    pred0 = mlp(data)

    with BytesIO() as fout:
        mge.save(mlp.state_dict(), fout)
        fout.seek(0)
        checkpoint = mge.load(fout)
        # change mlp weight.
        mlp.dense0.weight = Parameter(
            mlp.dense0.weight.astype(mgb.dtype.qint8(0.00001)).numpy())
        mlp.dense1.weight = Parameter(
            mlp.dense1.weight.astype(mgb.dtype.qint8(0.2)).numpy())
        mlp.load_state_dict(checkpoint)
        pred1 = mlp(data)

    assertTensorClose(pred0.astype("float32").numpy(),
                      pred1.astype("float32").numpy(),
                      max_err=5e-6)
예제 #5
0
def test_sgd_simple():
    data, data_shape, label, label_shape = get_input()
    mlp = MLP()
    opt = SGD(mlp.parameters(), lr=0.01, weight_decay=0.1)
    for idx in range(3):
        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        if idx % 2:
            opt.zero_grad()
        else:
            mlp.zero_grad()
        opt.backward(loss)
        grads = TensorDict()
        orig_params = TensorDict()
        for param in mlp.parameters():
            grad = F.grad(loss, param, use_virtual_grad=False)
            assertTensorClose(grad.numpy(), param.grad.numpy())
            grads[param] = np.copy(grad.numpy())
            orig_params[param] = np.copy(param.numpy())
        opt.step()
        for param in mlp.parameters():
            assertTensorClose(param.numpy(),
                              orig_params[param] * 0.999 - grads[param] * 0.01)
예제 #6
0
def test_sgd_momentum_static():
    _, data_shape, _, label_shape = get_input()
    mlp = MLP()
    opt = SGD(mlp.parameters(), lr=0.01, momentum=0.9)

    @trace
    def f(data, label):
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt.zero_grad()
        opt.backward(loss)

    slots = TensorDict()
    for param in mlp.parameters():
        slots[param] = np.zeros(param.shape).astype(np.float32)
    for _ in range(3):
        f(
            np.random.random(data_shape).astype(np.float32),
            np.random.randint(0, 10, label_shape).astype(np.int32),
        )
        orig_params = TensorDict()
        grads = TensorDict()
        for param in mlp.parameters():
            orig_params[param] = np.copy(param.numpy())
            grads[param] = np.copy(param.grad.numpy())
        opt.step()
        for param in mlp.parameters():
            slot = slots[param]
            orig_param = orig_params[param]
            slot *= 0.9
            slot -= param.grad.numpy() * 0.01
            assertTensorClose(param.numpy(), orig_param + slot)
예제 #7
0
def test_update_lr():
    data, data_shape, label, label_shape = get_input()
    mlp = MLP()
    opt = SGD(mlp.parameters(), lr=0.01)
    pred = mlp(data)
    loss = F.square_loss(pred, label.reshape(-1, 1))
    opt.zero_grad()
    opt.backward(loss)
    opt.step()
    for group in opt.param_groups:
        group["lr"] += 0.02
    for _ in range(3):
        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt.zero_grad()
        opt.backward(loss)
        for param in mlp.parameters():
            grad = F.grad(loss, param, use_virtual_grad=False)
            assertTensorClose(grad.numpy(), param.grad.numpy())
        orig_params = []
        for param in mlp.parameters():
            orig_params.append(np.copy(param.numpy()))
        opt.step()
        for param, orig_param in zip(mlp.parameters(), orig_params):
            assertTensorClose(param.numpy(),
                              orig_param - param.grad.numpy() * 0.03)
예제 #8
0
def test_compile_multi_times_eager():
    return  # XXX: rewrite or remove this test
    data = Input("data", shape=(2, 28))
    label = Input("label", shape=(2, ), dtype=np.int32)

    mlp = MLP()
    opt = SGD(mlp.parameters(requires_grad=True), lr=0.01)

    pred0 = mlp(data)
    pred = F.softmax(pred0)
    loss = F.square_loss(pred, label.reshape(2, 1))
    opt.zero_grad()
    grads = opt.backward(loss)
    opt.step()

    f0 = compile(pred, None)
    f1 = compile([pred, loss], grads, copy=False)
    for _ in range(3):
        data = np.random.random((2, 28)).astype(np.float32)
        label = np.random.randint(0, 10, (2, )).astype(np.float32)
        out0 = f0(data=data)
        out1 = f1(data=data, label=label)
        assertTensorClose(out0[0], out1[0])
예제 #9
0
def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
    iter_num = 3
    data, data_shape, label, label_shape = get_input()

    net = MLP()
    opt = getattr(optimizer, opt_str)(net.parameters(), **test_case)
    check_func = check_class(net, **test_case)

    step = 0

    # eager graph
    for i in range(iter_num):
        if update_lr and i == 1:  # change learning rate
            for group in opt.param_groups:
                group["lr"] += 0.01
            check_func.lr += 0.01
        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = net(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt.zero_grad()
        opt.backward(loss)
        ori_params = TensorDict()
        for param in net.parameters():
            ori_params[param] = np.copy(param.numpy())
        opt.step()
        step += 1
        check_func(ori_params, net.parameters(), step)

    # static graph
    @trace
    def train_func(data, label):
        pred = net(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt.backward(loss)

    for i in range(iter_num):
        if update_lr and i == 1:  # change learning rate
            for group in opt.param_groups:
                group["lr"] += 0.01
            check_func.lr += 0.01
        opt.zero_grad()
        ori_params = TensorDict()
        for param in net.parameters():
            ori_params[param] = np.copy(param.numpy())
        train_func(
            np.random.random(data_shape).astype(np.float32),
            np.random.randint(0, 10, label_shape).astype(np.int32),
        )
        opt.step()
        step += 1
        check_func(ori_params, net.parameters(), step)
예제 #10
0
def test_optimizer_serialization():
    data, data_shape, label, label_shape = get_input()
    mlp = MLP()
    opt = SGD(mlp.parameters(), lr=0.01, momentum=0.9)
    slots = TensorDict()
    for param in mlp.parameters():
        slots[param] = np.zeros(param.shape).astype(np.float32)

    pred = mlp(data)
    loss = F.square_loss(pred, label.reshape(-1, 1))
    opt.zero_grad()
    opt.backward(loss)
    opt.step()
    for param in mlp.parameters():
        slot = slots[param]
        slot *= 0.9
        slot -= param.grad.numpy() * 0.01

    with BytesIO() as fout:
        save(opt.state_dict(), fout)
        fout.seek(0)
        state_dict = load(fout)
        opt1 = SGD(mlp.parameters(), lr=0.02, momentum=0.8)
        opt1.load_state_dict(state_dict)

        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt1.zero_grad()
        opt1.backward(loss)
        orig_params = TensorDict()
        for param in mlp.parameters():
            orig_params[param] = np.copy(param.numpy())
        opt1.step()
        for param in mlp.parameters():
            orig_param = orig_params[param]
            slot = slots[param]
            slot *= 0.9
            slot -= param.grad.numpy() * 0.01
            assertTensorClose(param.numpy(), orig_param + slot)
예제 #11
0
def test_pickle_module():
    data_shape = (2, 28)
    data = tensor()
    data.set_value(np.random.random(data_shape))
    mlp = MLP()
    # pickle before forward
    with BytesIO() as fout:
        mge.save(mlp, fout)
        fout.seek(0)
        mlp1 = mge.load(fout)
        pred0 = mlp1(data)

    pred1 = mlp(data)

    # pickle after forward
    with BytesIO() as fout:
        mge.save(mlp, fout)
        fout.seek(0)
        mlp1 = mge.load(fout)
        pred2 = mlp1(data)

    assertTensorClose(pred0.numpy(), pred1.numpy(), max_err=5e-6)
    assertTensorClose(pred0.numpy(), pred2.numpy(), max_err=5e-6)
예제 #12
0
def test_state_dict():
    data_shape = (2, 28)
    data = tensor()
    data.set_value(np.random.random(data_shape))
    mlp = MLP()
    pred0 = mlp(data)

    with BytesIO() as fout:
        mge.save(mlp.state_dict(), fout)
        fout.seek(0)
        state_dict = mge.load(fout)
        state_dict["extra"] = None
        mlp1 = MLP()
        mlp1.load_state_dict(state_dict, strict=False)
        pred1 = mlp1(data)
        assertTensorClose(pred0.numpy(), pred1.numpy(), max_err=5e-6)
        with pytest.raises(KeyError):
            mlp1.load_state_dict(state_dict)
        del state_dict["extra"]
        del state_dict["dense0.bias"]
        with pytest.raises(KeyError):
            mlp1.load_state_dict(state_dict)
예제 #13
0
def test_adam():
    data, data_shape, label, label_shape = get_input()
    mlp = MLP()
    beta0 = 0.8
    beta1 = 0.9
    eps = 1e-4
    opt = Adam(mlp.parameters(), lr=0.01, betas=(beta0, beta1), eps=eps)
    m_slots = TensorDict()
    v_slots = TensorDict()
    for param in mlp.parameters():
        m_slots[param] = np.zeros(param.shape).astype(np.float32)
        v_slots[param] = np.zeros(param.shape).astype(np.float32)
    step_size = 0

    def check_value():
        for param in mlp.parameters():
            grad = param.grad.numpy()
            orig_param = orig_params[param]
            m = m_slots[param]
            v = v_slots[param]
            m *= beta0
            m += (1 - beta0) * grad
            v *= beta1
            v += (1 - beta1) * grad * grad
            update = (m / (1 - beta0**step_size)) / (
                np.sqrt(v / (1 - beta1**step_size)) + eps)
            assertTensorClose(param.numpy(), orig_param - 0.01 * update)

    # eager
    for _ in range(3):
        data.set_value(np.random.random(data_shape).astype(np.float32))
        label.set_value(np.random.randint(0, 10, label_shape))
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt.zero_grad()
        grads = opt.backward(loss)
        orig_params = TensorDict()
        for param in mlp.parameters():
            orig_params[param] = np.copy(param.numpy())
        opt.step()
        step_size += 1
        check_value()

    # static
    @trace
    def f(data, label):
        pred = mlp(data)
        loss = F.square_loss(pred, label.reshape(-1, 1))
        opt.backward(loss)

    for _ in range(3):
        opt.zero_grad()
        orig_params = TensorDict()
        for param in mlp.parameters():
            orig_params[param] = np.copy(param.numpy())
        f(
            np.random.random(data_shape).astype(np.float32),
            np.random.randint(0, 10, label_shape).astype(np.int32),
        )
        opt.step()
        step_size += 1
        check_value()