def test_multihead(self, ): ''' ''' data = np.random.rand(8, 3, 16) dt = torch.tensor(data, requires_grad=True, dtype=torch.float32) mt = torch.nn.MultiheadAttention(16, 4) outt, attt = mt(dt, dt, dt) outt.mean().backward() print(attt.shape) dl = L.from_numpy(data[...], requires_grad=True) ml = L.nn.MultiHeadAttention(16, 4) ws = torch.cat([x.t() for x in mt.in_proj_weight.data.chunk(3)], dim=0) ml.in_proj_weight.storage[...] = ws.numpy() ml.in_proj_bias.storage[...] = mt.in_proj_bias.data.numpy() ml.out_proj.weight.storage[...] = mt.out_proj.weight.data.t().numpy() ml.out_proj.bias.storage[...] = mt.out_proj.bias.data.numpy() outl, _ = ml(dl, dl, dl) outl.mean().backward() print(ml.in_proj_weight.shape, mt.in_proj_weight.shape) print('output', outl.shape) np.testing.assert_almost_equal(outl.data.storage, outt.data.numpy(), decimal=4) np.testing.assert_almost_equal(dl.grad.data.storage, dt.grad.data.numpy(), decimal=4)
def train(model): '''''' data = L.from_numpy(_data) label = L.from_numpy(_label) lr = 0.01 for i in range(500): model.zero_grad() out = model(data) loss = ((out - label)**2).mean() loss.backward() for p in model.parameters(): p.data -= p.grad * lr if i % 200 == 0: print(i, loss.data)
def test(model, test_loader): model.eval() correct = 0 for data, label in test_loader: # data = L.from_numpy(data.data.numpy()) data = L.from_numpy(np.array(data)) label = np.array(label) output = model(data) correct += (output.data.argmax(axis=1) == label).sum() print('\nAccuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
def test_padding_functional(self, ): ''' ''' for padding in [ 2, [2, 3], [2, 3, 4, 5], ]: t = torch.rand(4, 5, 6, 7, requires_grad=True) ot = torch.nn.ZeroPad2d(padding=padding)(t) v = L.from_numpy(t.data.numpy()[...], requires_grad=True) ov = L.nn.functional.zero_pad2d(v, padding) ot.mean().backward() ov.mean().backward() np.testing.assert_almost_equal(ov.data.numpy(), ot.data.numpy()) np.testing.assert_almost_equal(v.grad.numpy(), t.grad.data.numpy())
def test_constantpadding_module(self, ): ''' ''' for padding in [ 2, [2, 3], [2, 3, 4, 5], ]: t = torch.rand(4, 5, 6, 7, requires_grad=True) v = L.from_numpy(t.data.numpy()[...], requires_grad=True) ot = torch.nn.ConstantPad2d(padding=padding, value=2.)(t) ov = L.nn.ConstantPad2d(padding=padding, value=2.)(v) ot.mean().backward() ov.mean().backward() np.testing.assert_almost_equal(ov.data.numpy(), ot.data.numpy()) np.testing.assert_almost_equal(v.grad.numpy(), t.grad.data.numpy())
def train(args, model, train_loader, optimizer, epoch): ''' ''' model.train() for _idx, (data, label) in enumerate(train_loader): data = L.from_numpy(np.array(data)) label = np.array(label) label = L.Variable(np.eye(10)[label]) output = model(data) loss = F.cross_entropy(output, label) optimizer.zero_grad() loss.backward() optimizer.step() if _idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, _idx * data.shape[0], len(train_loader.dataset), 100. * _idx * data.shape[0] / len(train_loader.dataset), loss.data))