Esempio n. 1
0
def test_grad_grad_viterbi(operator):
    states, emissions, theta = make_data(10)

    theta = torch.from_numpy(theta)
    theta = theta[:, None, :, :]
    theta.requires_grad_()

    viterbi = Viterbi(operator)
    gradgradcheck(viterbi, (theta, ))
Esempio n. 2
0
def test_grad_hessian_viterbi_two_samples(operator):
    states1, emissions1, theta1 = make_data(10)
    states2, emissions2, theta2 = make_data(5)
    lengths = torch.LongTensor([10, 5])

    theta1 = torch.from_numpy(theta1)
    theta2 = torch.from_numpy(theta2)

    theta1.requires_grad_()
    theta2.requires_grad_()

    viterbi = Viterbi(operator)

    def func(theta1_, theta2_):
        W = pad_sequence([theta1_, theta2_])
        return viterbi(W, lengths)

    gradcheck(func, (theta1, theta2))
    gradgradcheck(func, (theta1, theta2))
Esempio n. 3
0
def test_viterbi_two_lengths(operator):
    states1, emissions1, theta1 = make_data(10)
    states2, emissions2, theta2 = make_data(5)
    lengths = torch.LongTensor([10, 5])

    theta1 = torch.from_numpy(theta1)
    theta2 = torch.from_numpy(theta2)

    theta1.requires_grad_()
    theta2.requires_grad_()
    W = pad_sequence([theta1, theta2])

    viterbi = Viterbi(operator)
    v = viterbi(W, lengths=lengths)
    s = v.sum()
    s.backward()
    decoded1 = torch.argmax(theta1.grad.sum(dim=2), dim=1).numpy()
    decoded2 = torch.argmax(theta2.grad.sum(dim=2), dim=1).numpy()
    assert np.all(decoded1 == states1)
    assert np.all(decoded2 == states2)
Esempio n. 4
0
def test_viterbi(operator):
    states, emissions, theta = make_data(10)

    theta = torch.from_numpy(theta)
    theta.requires_grad_()
    W = theta[:, None, :, :]

    viterbi = Viterbi(operator)
    v = viterbi(W)
    s = v.sum()
    s.backward()
    decoded = torch.argmax(theta.grad.sum(dim=2), dim=1).numpy()
    assert np.all(decoded == states)
Esempio n. 5
0
def test_packed_viterbi(operator):
    states, emissions, theta = make_data(10)

    theta = torch.from_numpy(theta)
    theta = theta[:, None, :, :]
    theta = theta.repeat((1, 2, 1, 1))
    theta.requires_grad_()
    W = pack_padded_sequence(theta, [10, 10])

    viterbi = PackedViterbi(operator)
    v = viterbi(W)
    s = v.sum()
    s.backward()
    decoded = torch.argmax(theta.grad[:, 0].sum(dim=2), dim=1).numpy()
    assert np.all(decoded == states)
Esempio n. 6
0
def test_viterbi(operator):
    states, emissions, theta = make_data(4)
    nll_ref, grad_ref, _, _ = viterbi_grad(theta, operator=operator)

    theta = torch.from_numpy(theta[:, None, :, :])
    theta.requires_grad_()
    nll = viterbi(theta, operator=operator)
    nll = nll.sum()

    np.testing.assert_almost_equal(nll.item(), nll_ref)

    nll.backward()

    grad = theta.grad[:, 0].numpy()
    np.testing.assert_array_almost_equal(grad, grad_ref)
Esempio n. 7
0
def test_hessian_viterbi(operator):
    torch.manual_seed(0)

    states, emissions, theta = make_data(10)
    theta /= 100

    theta = torch.from_numpy(theta)
    theta = theta[:, None, :, :]
    theta.requires_grad_()

    viterbi = Viterbi(operator)
    ll = viterbi(theta)
    g, = torch.autograd.grad(ll, (theta, ), create_graph=True)
    z = torch.randn_like(g)
    s = torch.sum(g * z)
    s.backward()

    assert theta.grad.shape == (10, 1, 3, 3)
Esempio n. 8
0
def test_viterbi_grad(operator):
    states, emissions, theta = make_data()
    theta /= 100

    def func(X):
        X = X.reshape(theta.shape)
        return viterbi_value(X, operator=operator)

    def grad(X):
        X = X.reshape(theta.shape)
        _, grad, _, _ = viterbi_grad(X, operator=operator)
        return grad.ravel()

    # check_grad does not work with ndarray of dim > 2
    err = check_grad(func, grad, theta.ravel())
    if operator == 'sparsemax':
        assert err < 1e-4
    else:
        assert err < 1e-6
Esempio n. 9
0
def test_viterbi_grad(operator):
    states, emissions, theta = make_data(4)
    theta = theta / 100

    Z = np.zeros_like(theta)
    Z[1, 2, 1] = 1

    _, hessian_prod_ref = viterbi_hessian_prod(theta, Z, operator=operator)

    Z = torch.from_numpy(Z[:, None, :, :])
    theta = torch.from_numpy(theta[:, None, :, :])
    theta.requires_grad_()
    v_grad = viterbi_decode(theta, operator=operator)

    v_h = torch.sum(Z * v_grad)
    v_h.backward()
    hessian_prod = theta.grad[:, 0].numpy()

    np.testing.assert_array_almost_equal(hessian_prod, hessian_prod_ref)
Esempio n. 10
0
def test_viterbi_hessian(operator):
    states, emissions, theta = make_data()

    theta /= 100
    Z = np.random.randn(*theta.shape)

    def func(X):
        X = X.reshape(theta.shape)
        _, grad, _, _ = viterbi_grad(X, operator=operator)
        return np.sum(grad * Z)

    def grad(X):
        X = X.reshape(theta.shape)
        _, H = viterbi_hessian_prod(X, Z, operator=operator)
        return H.ravel()

    # check_grad does not work with ndarray of dim > 2
    err = check_grad(func, grad, theta.ravel())
    if operator == 'sparsemax':
        assert err < 1e-4
    else:
        assert err < 1e-6
Esempio n. 11
0
def test_viterbi(operator):
    states, emissions, theta = make_data(100)
    viterbi, grad, _, _ = viterbi_grad(theta, operator=operator)
    decoded = np.argmax(grad.sum(axis=2), axis=1)
    assert np.all(decoded == states)