Exemple #1
0
    def test_multihead(self, ):
        ''' '''

        data = np.random.rand(8, 3, 16)

        dt = torch.tensor(data, requires_grad=True, dtype=torch.float32)
        mt = torch.nn.MultiheadAttention(16, 4)

        outt, attt = mt(dt, dt, dt)
        outt.mean().backward()

        print(attt.shape)

        dl = L.from_numpy(data[...], requires_grad=True)
        ml = L.nn.MultiHeadAttention(16, 4)

        ws = torch.cat([x.t() for x in mt.in_proj_weight.data.chunk(3)], dim=0)
        ml.in_proj_weight.storage[...] = ws.numpy()
        ml.in_proj_bias.storage[...] = mt.in_proj_bias.data.numpy()
        ml.out_proj.weight.storage[...] = mt.out_proj.weight.data.t().numpy()
        ml.out_proj.bias.storage[...] = mt.out_proj.bias.data.numpy()

        outl, _ = ml(dl, dl, dl)
        outl.mean().backward()

        print(ml.in_proj_weight.shape, mt.in_proj_weight.shape)
        print('output', outl.shape)

        np.testing.assert_almost_equal(outl.data.storage,
                                       outt.data.numpy(),
                                       decimal=4)
        np.testing.assert_almost_equal(dl.grad.data.storage,
                                       dt.grad.data.numpy(),
                                       decimal=4)
Exemple #2
0
def train(model):
    ''''''
    data = L.from_numpy(_data)
    label = L.from_numpy(_label)
    lr = 0.01

    for i in range(500):

        model.zero_grad()
        out = model(data)
        loss = ((out - label)**2).mean()
        loss.backward()

        for p in model.parameters():
            p.data -= p.grad * lr

        if i % 200 == 0:
            print(i, loss.data)
Exemple #3
0
def test(model, test_loader):
    model.eval()

    correct = 0
    for data, label in test_loader:
        # data = L.from_numpy(data.data.numpy())
        data = L.from_numpy(np.array(data))
        label = np.array(label)
        
        output = model(data)

        correct += (output.data.argmax(axis=1) == label).sum()


    print('\nAccuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
Exemple #4
0
    def test_padding_functional(self, ):
        ''' '''
        for padding in [
                2,
            [2, 3],
            [2, 3, 4, 5],
        ]:
            t = torch.rand(4, 5, 6, 7, requires_grad=True)
            ot = torch.nn.ZeroPad2d(padding=padding)(t)
            v = L.from_numpy(t.data.numpy()[...], requires_grad=True)
            ov = L.nn.functional.zero_pad2d(v, padding)
            ot.mean().backward()
            ov.mean().backward()

            np.testing.assert_almost_equal(ov.data.numpy(), ot.data.numpy())
            np.testing.assert_almost_equal(v.grad.numpy(), t.grad.data.numpy())
Exemple #5
0
    def test_constantpadding_module(self, ):
        ''' '''
        for padding in [
                2,
            [2, 3],
            [2, 3, 4, 5],
        ]:
            t = torch.rand(4, 5, 6, 7, requires_grad=True)
            v = L.from_numpy(t.data.numpy()[...], requires_grad=True)

            ot = torch.nn.ConstantPad2d(padding=padding, value=2.)(t)
            ov = L.nn.ConstantPad2d(padding=padding, value=2.)(v)
            ot.mean().backward()
            ov.mean().backward()

            np.testing.assert_almost_equal(ov.data.numpy(), ot.data.numpy())
            np.testing.assert_almost_equal(v.grad.numpy(), t.grad.data.numpy())
Exemple #6
0
def train(args, model, train_loader, optimizer, epoch):
    '''
    '''
    model.train()

    for _idx, (data, label) in enumerate(train_loader):

        data = L.from_numpy(np.array(data))
        label = np.array(label)
        label = L.Variable(np.eye(10)[label])

        output = model(data)
        loss = F.cross_entropy(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if _idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, _idx * data.shape[0], len(train_loader.dataset),
                100. * _idx * data.shape[0] / len(train_loader.dataset), 
                loss.data))