Esempio n. 1
0
def test_grad_grad_viterbi(operator):
    states, emissions, theta = make_data(10)

    theta = torch.from_numpy(theta)
    theta = theta[:, None, :, :]
    theta.requires_grad_()

    viterbi = Viterbi(operator)
    gradgradcheck(viterbi, (theta, ))
Esempio n. 2
0
def test_viterbi(operator):
    states, emissions, theta = make_data(10)

    theta = torch.from_numpy(theta)
    theta.requires_grad_()
    W = theta[:, None, :, :]

    viterbi = Viterbi(operator)
    v = viterbi(W)
    s = v.sum()
    s.backward()
    decoded = torch.argmax(theta.grad.sum(dim=2), dim=1).numpy()
    assert np.all(decoded == states)
Esempio n. 3
0
def test_hessian_viterbi(operator):
    torch.manual_seed(0)

    states, emissions, theta = make_data(10)
    theta /= 100

    theta = torch.from_numpy(theta)
    theta = theta[:, None, :, :]
    theta.requires_grad_()

    viterbi = Viterbi(operator)
    ll = viterbi(theta)
    g, = torch.autograd.grad(ll, (theta, ), create_graph=True)
    z = torch.randn_like(g)
    s = torch.sum(g * z)
    s.backward()

    assert theta.grad.shape == (10, 1, 3, 3)
Esempio n. 4
0
def test_grad_hessian_viterbi_two_samples(operator):
    states1, emissions1, theta1 = make_data(10)
    states2, emissions2, theta2 = make_data(5)
    lengths = torch.LongTensor([10, 5])

    theta1 = torch.from_numpy(theta1)
    theta2 = torch.from_numpy(theta2)

    theta1.requires_grad_()
    theta2.requires_grad_()

    viterbi = Viterbi(operator)

    def func(theta1_, theta2_):
        W = pad_sequence([theta1_, theta2_])
        return viterbi(W, lengths)

    gradcheck(func, (theta1, theta2))
    gradgradcheck(func, (theta1, theta2))
Esempio n. 5
0
def test_viterbi_two_lengths(operator):
    states1, emissions1, theta1 = make_data(10)
    states2, emissions2, theta2 = make_data(5)
    lengths = torch.LongTensor([10, 5])

    theta1 = torch.from_numpy(theta1)
    theta2 = torch.from_numpy(theta2)

    theta1.requires_grad_()
    theta2.requires_grad_()
    W = pad_sequence([theta1, theta2])

    viterbi = Viterbi(operator)
    v = viterbi(W, lengths=lengths)
    s = v.sum()
    s.backward()
    decoded1 = torch.argmax(theta1.grad.sum(dim=2), dim=1).numpy()
    decoded2 = torch.argmax(theta2.grad.sum(dim=2), dim=1).numpy()
    assert np.all(decoded1 == states1)
    assert np.all(decoded2 == states2)
length = 100
batch_size = 256
n_targets = 32
n_features = 100
gpu = True
operator = 'sparsemax'

if gpu and torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

X = torch.FloatTensor(length, batch_size, n_features).uniform_()

viterbi = Viterbi(operator=operator)
linear_potential = LinearPotential(n_features, n_targets)
theta = linear_potential(X)
theta = theta.detach()
theta = theta.to(device)
theta.requires_grad_()
z = torch.randn_like(theta)

value = torch.sum(viterbi(theta))
g, = torch.autograd.grad(value, (theta, ), create_graph=True)
s = torch.sum(g * z)
with torch.autograd.profiler.profile(use_cuda=True) as prof:
    s.backward()
print('Value', value.item())
print('|g|', torch.sum(torch.abs(theta.grad)).item())
prof.export_chrome_trace('prof.txt')