예제 #1
0
    def test_viterbi_path_grad(self):
        g_str = [
            "0 1",
            "3 4",
            "0 1 0 0 2",
            "0 2 1 1 1",
            "1 2 0 0 2",
            "2 3 0 0 1",
            "2 3 1 1 3",
            "1 4 0 0 2",
            "2 4 1 1 3",
            "3 4 0 0 2",
        ]
        g = create_graph_from_text(g_str)
        gtn.backward(gtn.viterbi_path(g))
        expected = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0]
        self.assertEqual(g.grad().weights_to_list(), expected)
        g.zero_grad()

        def forward_fn(g):
            paths = [
                gtn.viterbi_path(g),
                gtn.viterbi_path(g),
                gtn.viterbi_path(g)
            ]
            return gtn.forward_score(gtn.union(paths))

        gtn.backward(forward_fn(g))
        self.assertTrue(numerical_grad_check(forward_fn, g, 1e-3, 1e-5))
예제 #2
0
        def test_closure_grad(self):
            g1 = gtn.Graph()
            g1.add_node(True)
            g1.add_node(False, True)
            g1.add_arc(0, 1, 0, 0, 1.3)
            g1.add_arc(1, 1, 1, 1, 2.1)

            g2 = gtn.Graph()
            g2.add_node(True)
            g2.add_node()
            g2.add_node()
            g2.add_node()
            g2.add_node(False, True)
            g2.add_arc(0, 1, 0)
            g2.add_arc(0, 1, 1)
            g2.add_arc(1, 2, 0)
            g2.add_arc(1, 2, 1)
            g2.add_arc(2, 3, 0)
            g2.add_arc(2, 3, 1)
            g2.add_arc(3, 4, 0)
            g2.add_arc(3, 4, 1)

            gtn.backward(gtn.forward_score(gtn.compose(closure(g1), g2)))

            def forward_fn(g, g2=g2):
                return gtn.forward_score(gtn.compose(closure(g), g2))

            self.assertTrue(numerical_grad_check(forward_fn, g1, 1e-3, 1e-3))
예제 #3
0
 def process(b):
     scale = make_scalar_graph(scales[b])
     gtn.backward(losses[b], scale)
     emissions = emissions_graphs[b]
     if calc_emissions:
         grad = emissions.grad().weights_to_numpy()
         input_grad[b] = torch.tensor(grad).view(1, T, C)
예제 #4
0
 def process(b):
     T = ilens[b]
     gtn.backward(losses[b], False)
     emissions = emissions_graphs[b]
     grad = emissions.grad().weights_to_numpy()
     input_grad[b][:T] = torch.from_numpy(grad).view(1, T,
                                                     C) * scales[b]
    def test_simple_decomposition(self):
        T = 5
        tokens = ["a", "b", "ab", "ba", "aba"]
        scores = torch.randn((1, T, len(tokens)), requires_grad=True)
        labels = [[0, 1, 0]]
        transducer = Transducer(tokens=tokens,
                                graphemes_to_idx={
                                    "a": 0,
                                    "b": 1
                                })

        # Hand construct the alignment graph with all of the decompositions
        alignments = gtn.Graph(False)
        alignments.add_node(True)

        # Add the path ['a', 'b', 'a']
        alignments.add_node()
        alignments.add_arc(0, 1, 0)
        alignments.add_arc(1, 1, 0)
        alignments.add_node()
        alignments.add_arc(1, 2, 1)
        alignments.add_arc(2, 2, 1)
        alignments.add_node(False, True)
        alignments.add_arc(2, 3, 0)
        alignments.add_arc(3, 3, 0)

        # Add the path ['a', 'ba']
        alignments.add_node(False, True)
        alignments.add_arc(1, 4, 3)
        alignments.add_arc(4, 4, 3)

        # Add the path ['ab', 'a']
        alignments.add_node()
        alignments.add_arc(0, 5, 2)
        alignments.add_arc(5, 5, 2)
        alignments.add_arc(5, 3, 0)

        # Add the path ['aba']
        alignments.add_node(False, True)
        alignments.add_arc(0, 6, 4)
        alignments.add_arc(6, 6, 4)

        emissions = gtn.linear_graph(T, len(tokens), True)

        emissions.set_weights(scores.data_ptr())
        expected_loss = gtn.subtract(
            gtn.forward_score(emissions),
            gtn.forward_score(gtn.intersect(emissions, alignments)),
        )

        loss = transducer(scores, labels)
        self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5)
        loss.backward()
        gtn.backward(expected_loss)

        expected_grad = torch.tensor(emissions.grad().weights_to_numpy())
        expected_grad = expected_grad.view((1, T, len(tokens)))
        self.assertTrue(
            torch.allclose(scores.grad, expected_grad, rtol=1e-4, atol=1e-5))
 def process(b):
     for t, window in enumerate(output_graphs[b]):
         for c, out in enumerate(window):
             delta = make_scalar_graph(deltas[b, t, c])
             gtn.backward(out, delta)
         grad = (input_graphs[b][t].grad().weights_to_numpy().reshape(
             kernel_size, -1))
         input_grad[b, t * stride:t * stride + kernel_size] += grad
예제 #7
0
 def test_backward_calls_once(self):
     g1 = gtn.scalar_graph(1)
     g2 = gtn.scalar_graph(1)
     gout = gtn.add(g1, g2)
     gtn.backward([gout])
     pmap_grad = gout.grad()
     gout = gtn.add(g1, g2)
     gtn.backward(gout)
     grad = gout.grad()
     self.assertTrue(gtn.equal(pmap_grad, grad))
예제 #8
0
 def process(b):
     gtn.backward(losses[b], False)
     emissions = emissions_graphs[b]
     transitions = transitions_graphs[b]
     if input_grad is not None:
         grad = emissions.grad().weights_to_numpy()
         input_grad[b] = torch.from_numpy(grad).view(1, T,
                                                     C) * scales[b]
     if transitions_grad is not None:
         grad = transitions.grad().weights_to_numpy()
         transitions_grad[b] = (
             torch.from_numpy(grad).view(1, C + 1, C) * scales[b])
예제 #9
0
    def test_autograd(self):
        # The graph is not retained by default
        g1 = gtn.scalar_graph(3.0)
        g2 = gtn.scalar_graph(3.0)

        result = gtn.add(g1, g2)
        gtn.backward(result)
        # Cannot backward twice when graph is cleared.
        self.assertRaises(ValueError, gtn.backward, result)

        # Check the graph is retained
        g1.zero_grad()
        g2.zero_grad()
        result = gtn.add(g1, g2)
        gtn.backward(result, True)
        result.zero_grad()
        g1.zero_grad()
        g2.zero_grad()
        gtn.backward(result, True)
        self.assertEqual(g1.grad().item(), 1.0)
        self.assertEqual(g2.grad().item(), 1.0)

        # Check that provided input gradients are used.
        g1.zero_grad()
        g2.zero_grad()
        result = gtn.add(g1, g2)
        deltas = gtn.Graph()
        deltas.add_node(True)
        deltas.add_node(False, True)
        deltas.add_arc(0, 1, 0, 0, 7.0)
        gtn.backward(result, deltas)
        self.assertEqual(g1.grad().item(), 7.0)
        self.assertEqual(g2.grad().item(), 7.0)
예제 #10
0
 def test_grad_available(self):
     g = gtn.Graph()
     g.add_node(True)
     g.add_node()
     g.add_node(False, True)
     g.add_arc(0, 1, 0, 0, 1)
     g.add_arc(0, 1, 1, 1, 2)
     g.add_arc(0, 1, 2, 2, 3)
     g.add_arc(1, 2, 0, 0, 1)
     g.add_arc(1, 2, 1, 1, 2)
     g.add_arc(1, 2, 2, 2, 3)
     self.assertFalse(g.is_grad_available())
     gtn.backward(gtn.forward_score(g))
     self.assertTrue(g.is_grad_available())
예제 #11
0
    def test_retain_graph(self):
        # The graph is not retained by default
        g1 = gtn.Graph(True)
        g1.add_node(True)
        g1.add_node(False, True)
        g1.add_arc(0, 1, 0, 0, 3.0)

        g2 = gtn.Graph(True)
        g2.add_node(True)
        g2.add_node(False, True)
        g2.add_arc(0, 1, 0, 0, 3.0)

        result = gtn.add(g1, g2)
        gtn.backward(result)
        with self.assertRaises(ValueError):
            gtn.backward(result)

        # Check the graph is retained
        g1.zero_grad()
        g2.zero_grad()
        result = gtn.add(g1, g2)
        gtn.backward(result, True)
        g1.zero_grad()
        g2.zero_grad()
        result.zero_grad()
        gtn.backward(result, True)
        self.assertTrue(g1.grad().item() == 1.0)
        self.assertTrue(g2.grad().item() == 1.0)
예제 #12
0
    def test_viterbi_score_grad(self):
        g = gtn.Graph()
        g.add_node(True)
        g.add_node()
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, 1)
        g.add_arc(0, 1, 1, 1, 2)
        g.add_arc(0, 1, 2, 2, 3)
        g.add_arc(1, 2, 0, 0, 1)
        g.add_arc(1, 2, 1, 1, 2)
        g.add_arc(1, 2, 2, 2, 3)
        gtn.backward(gtn.viterbi_score(g))
        expected = [0.0, 0.0, 1.0, 0.0, 0.0, 1.0]
        self.assertEqual(g.grad().weights_to_list(), expected)

        # Handle two start nodes
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(True)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, -5)
        g.add_arc(0, 2, 0, 0, 1)
        g.add_arc(1, 2, 0, 0, 2)
        gtn.backward(gtn.viterbi_score(g))
        expected = [0.0, 0.0, 1.0]
        self.assertEqual(g.grad().weights_to_list(), expected)

        # Handle two accept nodes
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(False, True)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, 2)
        g.add_arc(0, 2, 0, 0, 2)
        g.add_arc(1, 2, 0, 0, 2)
        gtn.backward(gtn.viterbi_score(g))
        expected = [1.0, 0.0, 1.0]
        self.assertEqual(g.grad().weights_to_list(), expected)

        # A more complex test case
        g_str = [
            "0 1",
            "3 4",
            "0 1 0 0 2",
            "0 2 1 1 1",
            "1 2 0 0 2",
            "2 3 0 0 1",
            "2 3 1 1 1",
            "1 4 0 0 2",
            "2 4 1 1 3",
            "3 4 0 0 2",
        ]
        g = create_graph_from_text(g_str)
        gtn.backward(gtn.viterbi_score(g))
        # two possible paths with same viterbi score
        expected1 = [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]
        expected2 = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0]
        self.assertTrue(g.grad().weights_to_list() == expected1
                        or g.grad().weights_to_list() == expected2)
예제 #13
0
    def test_clone_project_grad(self):
        g1 = gtn.scalar_graph(3.0)
        g2 = gtn.scalar_graph(4.0)

        cloned = gtn.clone(g1)

        result = gtn.add(g1, g2)
        gtn.backward(result)

        # Cloned wasn't used in the computation
        self.assertRaises(RuntimeError, cloned.grad)

        # Cloned was used in the computation
        g1.zero_grad()
        g2.zero_grad()
        result = gtn.add(cloned, g2)
        gtn.backward(result)
        self.assertTrue(gtn.equal(cloned.grad(), g1.grad()))
예제 #14
0
    def test_calc_grad(self):
        g1 = gtn.Graph(False)
        g1.calc_grad = True
        g1.add_node(True)
        g1.add_node(False, True)
        g1.add_arc(0, 1, 1, 1, 1.0)

        g2 = gtn.Graph(True)
        g2.calc_grad = False
        g2.add_node(True)
        g2.add_node(False, True)
        g2.add_arc(0, 0, 1, 1, 1.0)

        result = gtn.add(g1, g2)
        gtn.backward(result)

        self.assertTrue(g1.grad().item() == 1.0)
        with self.assertRaises(RuntimeError):
            g2.grad()
예제 #15
0
    def test_input_grad(self):
        # Check that provided input gradients are used.
        g1 = gtn.Graph(True)
        g1.add_node(True)
        g1.add_node(False, True)
        g1.add_arc(0, 1, 0, 0, 3.0)

        g2 = gtn.Graph(True)
        g2.add_node(True)
        g2.add_node(False, True)
        g2.add_arc(0, 1, 0, 0, 3.0)

        result = gtn.add(g1, g2)

        deltas = gtn.Graph()
        deltas.add_node(True)
        deltas.add_node(False, True)
        deltas.add_arc(0, 1, 0, 0, 7.0)
        gtn.backward(result, deltas)
        self.assertTrue(g1.grad().item() == 7.0)
        self.assertTrue(g2.grad().item() == 7.0)
예제 #16
0
    def test_scalar_ops(self):
        g1 = gtn.Graph()
        g1.add_node(True)
        g1.add_node(False, True)
        g1.add_arc(0, 1, 0, 0, 1.0)

        # Test negate:
        res = gtn.negate(g1)
        self.assertEqual(res.item(), -1.0)
        gtn.backward(res)
        self.assertEqual(g1.grad().item(), -1.0)
        g1.zero_grad()

        g2 = gtn.Graph()
        g2.add_node(True)
        g2.add_node(False, True)
        g2.add_arc(0, 1, 0, 0, 3.0)

        # Test add:
        res = gtn.add(g1, g2)
        self.assertEqual(res.item(), 4.0)
        gtn.backward(res)
        self.assertEqual(g1.grad().item(), 1.0)
        self.assertEqual(g2.grad().item(), 1.0)
        g1.zero_grad()
        g2.zero_grad()

        # Test subtract:
        res = gtn.subtract(g1, g2)
        self.assertEqual(res.item(), -2.0)
        gtn.backward(res)
        self.assertEqual(g1.grad().item(), 1.0)
        self.assertEqual(g2.grad().item(), -1.0)
예제 #17
0
    def test_scalar_ops_grad(self):
        g1 = gtn.scalar_graph(3.0)

        result = gtn.negate(g1)
        gtn.backward(result)
        self.assertEqual(g1.grad().item(), -1.0)

        g1.zero_grad()

        g2 = gtn.scalar_graph(4.0)

        result = gtn.add(g1, g2)
        gtn.backward(result)
        self.assertEqual(g1.grad().item(), 1.0)
        self.assertEqual(g2.grad().item(), 1.0)

        g1.zero_grad()
        g2.zero_grad()

        result = gtn.subtract(g1, g2)
        gtn.backward(result)
        self.assertEqual(g1.grad().item(), 1.0)
        self.assertEqual(g2.grad().item(), -1.0)
        g1.zero_grad()
        g2.zero_grad()

        result = gtn.add(gtn.add(g1, g2), g1)
        gtn.backward(result)
        self.assertEqual(g1.grad().item(), 2.0)
        self.assertEqual(g2.grad().item(), 1.0)
        g1.zero_grad()

        g2nograd = gtn.scalar_graph(4.0, False)

        result = gtn.add(g1, g2nograd)
        gtn.backward(result)
        self.assertEqual(g1.grad().item(), 1.0)
        self.assertRaises(RuntimeError, g2nograd.grad)
예제 #18
0
    def test_compose_grad(self):
        first = gtn.Graph()
        first.add_node(True)
        first.add_node()
        first.add_node()
        first.add_node()
        first.add_node(False, True)
        first.add_arc(0, 1, 0, 0, 0)
        first.add_arc(0, 1, 1, 1, 1)
        first.add_arc(0, 1, 2, 2, 2)
        first.add_arc(1, 2, 0, 0, 0)
        first.add_arc(1, 2, 1, 1, 1)
        first.add_arc(1, 2, 2, 2, 2)
        first.add_arc(2, 3, 0, 0, 0)
        first.add_arc(2, 3, 1, 1, 1)
        first.add_arc(2, 3, 2, 2, 2)
        first.add_arc(3, 4, 0, 0, 0)
        first.add_arc(3, 4, 1, 1, 1)
        first.add_arc(3, 4, 2, 2, 2)

        second = gtn.Graph()
        second.add_node(True)
        second.add_node()
        second.add_node(False, True)
        second.add_arc(0, 1, 0, 0, 3.5)
        second.add_arc(1, 1, 0, 0, 2.5)
        second.add_arc(1, 2, 1, 1, 1.5)
        second.add_arc(2, 2, 1, 1, 4.5)

        composed = gtn.compose(first, second)
        gtn.backward(composed)

        gradsFirst = [1, 0, 0, 1, 1, 0, 1, 2, 0, 0, 2, 0]
        gradsSecond = [1, 2, 3, 2]

        self.assertEqual(gradsFirst, first.grad().weights_to_list())
        self.assertEqual(gradsSecond, second.grad().weights_to_list())
예제 #19
0
        def test_concat_grad(self):
            g1 = gtn.Graph()
            g1.add_node(True)
            g1.add_node()
            g1.add_node(False, True)
            g1.add_arc(0, 1, 0)
            g1.add_arc(1, 2, 1)

            # Works with a no gradient graph
            g2 = gtn.Graph()(False)
            g2.add_node(True)
            g2.add_node()
            g2.add_node(False, True)
            g2.add_arc(0, 1, 0)
            g2.add_arc(1, 2, 1)

            g3 = gtn.Graph()
            g3.add_node(True)
            g3.add_node()
            g3.add_node(False, True)
            g3.add_arc(0, 1, 0)
            g3.add_arc(1, 2, 1)

            gtn.backward(gtn.forward_score(concat([g1, g2, g3])))

            def forward_fn1(g, g2=g2, g3=g3):
                return gtn.forward_score(gtn.concat([g, g2, g3]))

            self.assertTrue(numerical_grad_check(forward_fn1, g1, 1e-4, 1e-3))

            def forward_fn2(g, g1=g1, g2=g2):
                return gtn.forward_score(gtn.concat([g1, g2, g]))

            self.assertTrue(numerical_grad_check(forward_fn2, g1, 1e-4, 1e-3))

            CHECK_THROWS(g2.grad())
예제 #20
0
    def test_parallel_backward(self):
        inputs1 = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]]
        inputs2 = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]]

        outputs = gtn.add(inputs1, inputs2)
        gtn.backward(outputs)
        # Test gradients
        inputs1 = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]]
        inputs2 = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]]
        outputs = gtn.add(inputs1, inputs2)
        gradIn = gtn.scalar_graph(5.0)
        gtn.backward(outputs, [gradIn], [False])

        inputs1Dup = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]]
        inputs2Dup = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]]
        expected = []
        for g1, g2 in zip(inputs1Dup, inputs2Dup):
            expected.append(gtn.add(g1, g2))
        for g in expected:
            gtn.backward(g, gtn.scalar_graph(5.0))

        for i in range(0, len(expected)):
            self.assertTrue(gtn.equal(inputs1[i].grad(), inputs1Dup[i].grad()))
            self.assertTrue(gtn.equal(inputs2[i].grad(), inputs2Dup[i].grad()))
예제 #21
0
 def seq_grad(b):
     gtn.backward(losses[b])
예제 #22
0
 def backward_single(b):
     gtn.backward(losses[b])
     emissions = emissions_graphs[b]
     grad = emissions.grad().weights_to_numpy()
     input_grad[b] = torch.from_numpy(grad).view(1, T, C)
예제 #23
0
파일: parallel.py 프로젝트: codeaudit/gtn-2
 def bwd():
     gtn.backward(out, [True])
예제 #24
0
def main():
    num_features = 3  # number of input features
    num_classes = 2   # number of output classes
    num_train = 1000  # size of the training set
    num_test = 200    # size of the testing set

    # Setup ground-truth model:
    gt_potentials, gt_transitions = gen_model(num_features, num_classes)

    # Sample training and test datasets:
    samples = sample_model(
        num_features, num_classes,
        gt_potentials, gt_transitions,
        num_train + num_test)
    train, test = samples[:num_train], samples[num_train:]
    print(f"Using {len(train)} samples for the training set")
    print(f"Using {len(test)} samples for the test set")

    # Make the graphs for learning:
    potentials, transitions = gen_model(
        num_features, num_classes, calc_grad=True, init=False)
    print("Unary potential graph has {} nodes and {} arcs".format(
        potentials.num_nodes(), potentials.num_arcs()))
    print("Transition graph has {} nodes and {} arcs".format(
        transitions.num_nodes(), transitions.num_arcs()))

    # Make the graphs to be learned:
    potentials, transitions = gen_model(
        num_features, num_classes, calc_grad=True, init=False)

    # Run the SGD loop:
    learning_rate = 1e-2
    max_iter = 10000
    losses = []
    for it, (X, Y) in enumerate(sampler(train)):
        # Compute the loss and take a gradient step:
        loss = crf_loss(X, Y, potentials, transitions)
        gtn.backward(loss)
        update_params(-learning_rate, potentials, transitions)

        # Clear the gradients:
        transitions.zero_grad()
        potentials.zero_grad()

        losses.append(loss.item())
        if (it + 1) % 1000 == 0:
            print("=" * 50)
            print(f"Iteration {it + 1}, Avg. Loss {np.mean(losses):.3f}")
            losses = []
        if it == max_iter:
            break

    # Evaluate on the test set:
    correct = 0.0
    total = 0
    for X, Y in test:
        full_graph = gtn.compose(gtn.compose(X, potentials), transitions)
        prediction = gtn.viterbi_path(full_graph).labels_to_list(False)
        correct += np.sum(np.array(Y.labels_to_list()) == prediction)
        total += len(prediction)
    print("Test: Accuracy {:.3f}".format(correct / total))
예제 #25
0
    def test_forward_score_grad(self):
        g = gtn.Graph()
        g.add_node(True)
        g.add_node()
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, 1)
        g.add_arc(0, 1, 1, 1, 2)
        g.add_arc(0, 1, 2, 2, 3)
        g.add_arc(1, 2, 0, 0, 1)
        g.add_arc(1, 2, 1, 1, 2)
        g.add_arc(1, 2, 2, 2, 3)
        gtn.backward(gtn.forward_score(g))
        self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3))

        # Handle two start nodes
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(True)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, -5)
        g.add_arc(0, 2, 0, 0, 1)
        g.add_arc(1, 2, 0, 0, 2)
        gtn.backward(gtn.forward_score(g))
        self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3))

        denom = 1 / (math.exp(-3) + math.exp(1) + math.exp(2))
        grad = g.grad()
        grad_weights = grad.weights_to_list()
        self.assertAlmostEqual(grad_weights[0], (denom * math.exp(-3)))
        self.assertAlmostEqual(grad_weights[1], (denom * math.exp(1)))
        self.assertAlmostEqual(grad_weights[2],
                               (denom * (math.exp(-3) + math.exp(2))))

        # Handle two accept nodes
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(False, True)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, 2)
        g.add_arc(0, 2, 0, 0, 2)
        g.add_arc(1, 2, 0, 0, 2)
        gtn.backward(gtn.forward_score(g))
        self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3))

        denom = 1 / (2 * math.exp(2) + math.exp(4))
        grad = g.grad()
        grad_weights = grad.weights_to_list()
        self.assertAlmostEqual(grad_weights[0],
                               (denom * (math.exp(2) + math.exp(4))),
                               places=5)
        self.assertAlmostEqual(grad_weights[1], (denom * math.exp(2)),
                               places=5)
        self.assertAlmostEqual(grad_weights[2], (denom * math.exp(4)),
                               places=5)

        # Handle case where some arcs don't lead to accepting states
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(False, False)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, 2)
        g.add_arc(0, 2, 0, 0, 2)
        gtn.backward(gtn.forward_score(g))
        self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3))
        grad = g.grad()
        grad_weights = grad.weights_to_list()
        self.assertAlmostEqual(grad_weights[0], (0.0))
        self.assertAlmostEqual(grad_weights[1], (1.0))

        # Handles negative infinity
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, -math.inf)
        g.add_arc(0, 1, 1, 1, -math.inf)
        gtn.backward(gtn.forward_score(g))

        grad = g.grad()
        grad_weights = grad.weights_to_list()
        self.assertTrue(math.isnan(grad_weights[0]))
        self.assertTrue(math.isnan(grad_weights[1]))

        g2 = gtn.Graph()
        g2.add_node(True)
        g2.add_node(False, True)
        g2.add_arc(0, 1, 0, 0, -math.inf)
        g2.add_arc(0, 1, 1, 1, 1.0)
        gtn.backward(gtn.forward_score(g2))

        grad2 = g2.grad()
        grad_weights = grad2.weights_to_list()
        self.assertAlmostEqual(grad_weights[0], (0.0))
        self.assertAlmostEqual(grad_weights[1], (1.0))

        # Handles infinity
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, math.inf)
        g.add_arc(0, 1, 1, 1, math.inf)
        gtn.backward(gtn.forward_score(g))
        grad = g.grad()
        grad_weights = grad.weights_to_list()
        self.assertTrue(math.isnan(grad_weights[0]))
        self.assertTrue(math.isnan(grad_weights[1]))

        g2 = gtn.Graph()
        g2.add_node(True)
        g2.add_node(False, True)
        g2.add_arc(0, 1, 0, 0, math.inf)
        g2.add_arc(0, 1, 1, 1, 1.0)
        gtn.backward(gtn.forward_score(g2))
        grad2 = g2.grad()
        grad_weights = grad.weights_to_list()
        self.assertTrue(math.isnan(grad_weights[0]))
        self.assertTrue(math.isnan(grad_weights[1]))

        # A more complex test case
        g_str = [
            "0 1",
            "3 4",
            "0 1 0 0 2",
            "0 2 1 1 1",
            "1 2 0 0 2",
            "2 3 0 0 1",
            "2 3 1 1 1",
            "1 4 0 0 2",
            "2 4 1 1 3",
            "3 4 0 0 2",
        ]
        g = create_graph_from_text(g_str)
        gtn.backward(gtn.forward_score(g))
        self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3))
예제 #26
0
    def test_sample_grad(self):
        g = gtn.Graph()
        g.add_node(True)
        g.add_node()
        g.add_node(False, True)
        g.add_arc(0, 0, 0)
        g.add_arc(0, 1, 1)
        g.add_arc(1, 0, 2)
        g.add_arc(1, 2, 3)

        for i in range(5):
            g.zero_grad()
            path = gtn.sample(g)
            # One for each arc in the original graph
            grads = [0.0, 0.0, 0.0, 0.0]
            path_labels = path.labels_to_list()
            for a in range(path.num_arcs()):
                grads[path_labels[a]] += 1

            gtn.backward(path)
            self.assertTrue(grads == g.grad().weights_to_list())

        def test_sum_grad(self):
            g1 = gtn.Graph()
            g1.add_node(True)
            g1.add_node()
            g1.add_node(False, True)
            g1.add_arc(0, 1, 0)
            g1.add_arc(1, 2, 1)

            # Works with a no gradient graph
            g2 = gtn.Graph()(False)
            g2.add_node(True)
            g2.add_node()
            g2.add_node(False, True)
            g2.add_arc(0, 1, 0)
            g2.add_arc(1, 2, 1)

            g3 = gtn.Graph()
            g3.add_node(True)
            g3.add_node()
            g3.add_node(False, True)
            g3.add_arc(0, 1, 0)
            g3.add_arc(1, 2, 1)

            gtn.backward(gtn.forward_score(gtn.union([g1, g2, g3])))

            def forward_fn1(g, g2=g2, g3=g3):
                return gtn.forward_score(gtn.union([g, g2, g3]))

            self.assertTrue(numerical_grad_check(forward_fn1, g1, 1e-4, 1e-3))

            def forward_fn2(g, g1=g1, g2=g2):
                return gtn.forward_score(gtn.union([g1, g2, g]))

            self.assertTrue(numerical_grad_check(forward_fn2, g3, 1e-4, 1e-3))

            CHECK_THROWS(g2.grad())

        def test_concat_grad(self):
            g1 = gtn.Graph()
            g1.add_node(True)
            g1.add_node()
            g1.add_node(False, True)
            g1.add_arc(0, 1, 0)
            g1.add_arc(1, 2, 1)

            # Works with a no gradient graph
            g2 = gtn.Graph()(False)
            g2.add_node(True)
            g2.add_node()
            g2.add_node(False, True)
            g2.add_arc(0, 1, 0)
            g2.add_arc(1, 2, 1)

            g3 = gtn.Graph()
            g3.add_node(True)
            g3.add_node()
            g3.add_node(False, True)
            g3.add_arc(0, 1, 0)
            g3.add_arc(1, 2, 1)

            gtn.backward(gtn.forward_score(concat([g1, g2, g3])))

            def forward_fn1(g, g2=g2, g3=g3):
                return gtn.forward_score(gtn.concat([g, g2, g3]))

            self.assertTrue(numerical_grad_check(forward_fn1, g1, 1e-4, 1e-3))

            def forward_fn2(g, g1=g1, g2=g2):
                return gtn.forward_score(gtn.concat([g1, g2, g]))

            self.assertTrue(numerical_grad_check(forward_fn2, g1, 1e-4, 1e-3))

            CHECK_THROWS(g2.grad())

        def test_closure_grad(self):
            g1 = gtn.Graph()
            g1.add_node(True)
            g1.add_node(False, True)
            g1.add_arc(0, 1, 0, 0, 1.3)
            g1.add_arc(1, 1, 1, 1, 2.1)

            g2 = gtn.Graph()
            g2.add_node(True)
            g2.add_node()
            g2.add_node()
            g2.add_node()
            g2.add_node(False, True)
            g2.add_arc(0, 1, 0)
            g2.add_arc(0, 1, 1)
            g2.add_arc(1, 2, 0)
            g2.add_arc(1, 2, 1)
            g2.add_arc(2, 3, 0)
            g2.add_arc(2, 3, 1)
            g2.add_arc(3, 4, 0)
            g2.add_arc(3, 4, 1)

            gtn.backward(gtn.forward_score(gtn.compose(closure(g1), g2)))

            def forward_fn(g, g2=g2):
                return gtn.forward_score(gtn.compose(closure(g), g2))

            self.assertTrue(numerical_grad_check(forward_fn, g1, 1e-3, 1e-3))
예제 #27
0
    def test_ctc_criterion(self):
        # These test cases are taken from wav2letter: https:#fburl.com/msom2e4v

        # Test case 1
        ctc = ctc_graph([0, 0], 1)

        emissions = emissions_graph([1.0, 0.0, 0.0, 1.0, 1.0, 0.0], 3, 2)

        loss = gtn.forward_score(gtn.compose(ctc, emissions))
        self.assertEqual(loss.item(), 0.0)

        # Should be 0 since scores are normalized
        z = gtn.forward_score(emissions)
        self.assertEqual(z.item(), 0.0)

        # Test case 2
        T = 3
        N = 4
        ctc = ctc_graph([1, 2], N - 1)
        emissions = emissions_graph([1.0] * (T * N), T, N)

        expected_loss = -math.log(0.25 * 0.25 * 0.25 * 5)

        loss = gtn.subtract(gtn.forward_score(gtn.compose(ctc, emissions)),
                            gtn.forward_score(emissions))
        self.assertAlmostEqual(-loss.item(), expected_loss)

        # Test case 3
        T = 5
        N = 6
        target = [0, 1, 2, 1, 0]

        # generate CTC graph
        ctc = ctc_graph(target, N - 1)

        # fmt: off
        emissions_vec = [
            0.633766,
            0.221185,
            0.0917319,
            0.0129757,
            0.0142857,
            0.0260553,
            0.111121,
            0.588392,
            0.278779,
            0.0055756,
            0.00569609,
            0.010436,
            0.0357786,
            0.633813,
            0.321418,
            0.00249248,
            0.00272882,
            0.0037688,
            0.0663296,
            0.643849,
            0.280111,
            0.00283995,
            0.0035545,
            0.00331533,
            0.458235,
            0.396634,
            0.123377,
            0.00648837,
            0.00903441,
            0.00623107,
        ]
        # fmt: on

        emissions = emissions_graph(emissions_vec, T, N)

        # The log probabilities are already normalized,
        # so this should be close to 0
        z = gtn.forward_score(emissions)
        self.assertTrue(abs(z.item()) < 1e-5)

        loss = gtn.subtract(z, gtn.forward_score(gtn.compose(ctc, emissions)))
        expected_loss = 3.34211
        self.assertAlmostEqual(loss.item(), expected_loss, places=5)

        # Check the gradients
        gtn.backward(loss)

        # fmt: off
        expected_grad = [
            -0.366234, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553,
            0.111121, -0.411608, 0.278779, 0.0055756, 0.00569609, 0.010436,
            0.0357786, 0.633813, -0.678582, 0.00249248, 0.00272882, 0.0037688,
            0.0663296, -0.356151, 0.280111, 0.00283995, 0.0035545, 0.00331533,
            -0.541765, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107
        ]
        # fmt: on
        all_close = True
        grad = emissions.grad()
        grad_weights = grad.weights_to_list()
        for i in range(T * N):
            g = grad_weights[i]
            all_close = all_close and (abs(expected_grad[i] - g) < 1e-5)

        self.assertTrue(all_close)

        # Test case 4
        # This test case is  taken from Tensor Flow CTC implementation
        # tinyurl.com/y9du5v5a
        T = 5
        N = 6
        target = [0, 1, 1, 0]

        # generate CTC graph
        ctc = ctc_graph(target, N - 1)
        # fmt: off
        emissions_vec = [
            0.30176,
            0.28562,
            0.0831517,
            0.0862751,
            0.0816851,
            0.161508,
            0.24082,
            0.397533,
            0.0557226,
            0.0546814,
            0.0557528,
            0.19549,
            0.230246,
            0.450868,
            0.0389607,
            0.038309,
            0.0391602,
            0.202456,
            0.280884,
            0.429522,
            0.0326593,
            0.0339046,
            0.0326856,
            0.190345,
            0.423286,
            0.315517,
            0.0338439,
            0.0393744,
            0.0339315,
            0.154046,
        ]
        # fmt: on

        emissions = emissions_graph(emissions_vec, T, N)

        # The log probabilities are already normalized,
        # so this should be close to 0
        z = gtn.forward_score(emissions)
        self.assertTrue(abs(z.item()) < 1e-5)

        loss = gtn.subtract(z, gtn.forward_score(gtn.compose(ctc, emissions)))
        expected_loss = 5.42262
        self.assertAlmostEqual(loss.item(), expected_loss, places=4)

        # Check the gradients
        gtn.backward(loss)
        # fmt: off
        expected_grad = [
            -0.69824,
            0.28562,
            0.0831517,
            0.0862751,
            0.0816851,
            0.161508,
            0.24082,
            -0.602467,
            0.0557226,
            0.0546814,
            0.0557528,
            0.19549,
            0.230246,
            0.450868,
            0.0389607,
            0.038309,
            0.0391602,
            -0.797544,
            0.280884,
            -0.570478,
            0.0326593,
            0.0339046,
            0.0326856,
            0.190345,
            -0.576714,
            0.315517,
            0.0338439,
            0.0393744,
            0.0339315,
            0.154046,
        ]
        # fmt: on

        all_close = True
        grad = emissions.grad()
        grad_weights = grad.weights_to_list()
        for i in range(T * N):
            g = grad_weights[i]
            all_close = all_close and (abs(expected_grad[i] - g) < 1e-5)
        self.assertTrue(all_close)
예제 #28
0
    def test_asg_criterion(self):
        # This test cases is taken from wav2letter: https://fburl.com/msom2e4v
        T = 5
        N = 6

        # fmt: off
        targets = [
            [2, 1, 5, 1, 3],
            [4, 3, 5],
            [3, 2, 2, 1],
        ]

        expected_loss = [
            7.7417464256287,
            6.4200420379639,
            8.2780694961548,
        ]

        emissions_vecs = [
            [
                -0.4340, -0.0254, 0.3667, 0.4180, -0.3805, -0.1707, 0.1060,
                0.3631, -0.1122, -0.3825, -0.0031, -0.3801, 0.0443, -0.3795,
                0.3194, -0.3130, 0.0094, 0.1560, 0.1252, 0.2877, 0.1997,
                -0.4554, 0.2774, -0.2526, -0.4001, -0.2402, 0.1295, 0.0172,
                0.1805, -0.3299
            ],
            [
                0.3298,
                -0.2259,
                -0.0959,
                0.4909,
                0.2996,
                -0.2543,
                -0.2863,
                0.3239,
                -0.3988,
                0.0732,
                -0.2107,
                -0.4739,
                -0.0906,
                0.0480,
                -0.1301,
                0.3975,
                -0.3317,
                -0.1967,
                0.4372,
                -0.2006,
                0.0094,
                0.3281,
                0.1873,
                -0.2945,
                0.2399,
                0.0320,
                -0.3768,
                -0.2849,
                -0.2248,
                0.3186,
            ],
            [
                0.0225,
                -0.3867,
                -0.1929,
                -0.2904,
                -0.4958,
                -0.2533,
                0.4001,
                -0.1517,
                -0.2799,
                -0.2915,
                0.4198,
                0.4506,
                0.1446,
                -0.4753,
                -0.0711,
                0.2876,
                -0.1851,
                -0.1066,
                0.2081,
                -0.1190,
                -0.3902,
                -0.1668,
                0.1911,
                -0.2848,
                -0.3846,
                0.1175,
                0.1052,
                0.2172,
                -0.0362,
                0.3055,
            ],
        ]

        emissions_grads = [
            [
                0.1060,
                0.1595,
                -0.7639,
                0.2485,
                0.1118,
                0.1380,
                0.1915,
                -0.7524,
                0.1539,
                0.1175,
                0.1717,
                0.1178,
                0.1738,
                0.1137,
                0.2288,
                0.1216,
                0.1678,
                -0.8057,
                0.1766,
                -0.7923,
                0.1902,
                0.0988,
                0.2056,
                0.1210,
                0.1212,
                0.1422,
                0.2059,
                -0.8160,
                0.2166,
                0.1300,
            ],
            [
                0.2029,
                0.1164,
                0.1325,
                0.2383,
                -0.8032,
                0.1131,
                0.1414,
                0.2602,
                0.1263,
                -0.3441,
                -0.3009,
                0.1172,
                0.1557,
                0.1788,
                0.1496,
                -0.5498,
                0.0140,
                0.0516,
                0.2306,
                0.1219,
                0.1503,
                -0.4244,
                0.1796,
                -0.2579,
                0.2149,
                0.1745,
                0.1160,
                0.1271,
                0.1350,
                -0.7675,
            ],
            [
                0.2195,
                0.1458,
                0.1770,
                -0.8395,
                0.1307,
                0.1666,
                0.2148,
                0.1237,
                -0.6613,
                -0.1223,
                0.2191,
                0.2259,
                0.2002,
                0.1077,
                -0.8386,
                0.2310,
                0.1440,
                0.1557,
                0.2197,
                -0.1466,
                -0.5742,
                0.1510,
                0.2160,
                0.1342,
                0.1050,
                -0.8265,
                0.1714,
                0.1917,
                0.1488,
                0.2094,
            ],
        ]
        # fmt: on
        transitions = gtn.Graph()
        transitions.add_node(True)
        for i in range(1, N + 1):
            transitions.add_node(False, True)
            transitions.add_arc(0, i, i - 1)  # p(i | <s>)

        for i in range(N):
            for j in range(N):
                transitions.add_arc(j + 1, i + 1, i)  # p(i | j)

        for b in range(len(targets)):
            target = targets[b]
            emissions_vec = emissions_vecs[b]
            emissions_grad = emissions_grads[b]

            fal = gtn.Graph()
            fal.add_node(True)
            for l in range(1, len(target) + 1):
                fal.add_node(False, l == len(target))
                fal.add_arc(l - 1, l, target[l - 1])
                fal.add_arc(l, l, target[l - 1])

            emissions = emissions_graph(emissions_vec, T, N, True)

            loss = gtn.subtract(
                gtn.forward_score(gtn.compose(emissions, transitions)),
                gtn.forward_score(
                    gtn.compose(gtn.compose(fal, transitions), emissions)),
            )

            self.assertAlmostEqual(loss.item(), expected_loss[b], places=3)

            # Check the gradients
            gtn.backward(loss)

            all_close = True
            grad = emissions.grad()
            grad_weights = grad.weights_to_list()
            for i in range(T * N):
                g = grad_weights[i]
                all_close = all_close and (abs(emissions_grad[i] - g) < 1e-4)
            self.assertTrue(all_close)

        all_close = True
        # fmt: off
        trans_grad = [
            0.3990,
            0.3396,
            0.3486,
            0.3922,
            0.3504,
            0.3155,
            0.3666,
            0.0116,
            -1.6678,
            0.3737,
            0.3361,
            -0.7152,
            0.3468,
            0.3163,
            -1.1583,
            -0.6803,
            0.3216,
            0.2722,
            0.3694,
            -0.6688,
            0.3047,
            -0.8531,
            -0.6571,
            0.2870,
            0.3866,
            0.3321,
            0.3447,
            0.3664,
            -0.2163,
            0.3039,
            0.3640,
            -0.6943,
            0.2988,
            -0.6722,
            0.3215,
            -0.1860,
        ]
        # fmt: on

        grad = transitions.grad()
        grad_weights = grad.weights_to_list()
        for i in range(N * N):
            g = grad_weights[i + N]
            all_close = all_close and (abs(trans_grad[i] - g) < 1e-4)
        self.assertTrue(all_close)