def test_viterbi_path_grad(self): g_str = [ "0 1", "3 4", "0 1 0 0 2", "0 2 1 1 1", "1 2 0 0 2", "2 3 0 0 1", "2 3 1 1 3", "1 4 0 0 2", "2 4 1 1 3", "3 4 0 0 2", ] g = create_graph_from_text(g_str) gtn.backward(gtn.viterbi_path(g)) expected = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0] self.assertEqual(g.grad().weights_to_list(), expected) g.zero_grad() def forward_fn(g): paths = [ gtn.viterbi_path(g), gtn.viterbi_path(g), gtn.viterbi_path(g) ] return gtn.forward_score(gtn.union(paths)) gtn.backward(forward_fn(g)) self.assertTrue(numerical_grad_check(forward_fn, g, 1e-3, 1e-5))
def test_closure_grad(self): g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 0, 0, 1.3) g1.add_arc(1, 1, 1, 1, 2.1) g2 = gtn.Graph() g2.add_node(True) g2.add_node() g2.add_node() g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, 0) g2.add_arc(0, 1, 1) g2.add_arc(1, 2, 0) g2.add_arc(1, 2, 1) g2.add_arc(2, 3, 0) g2.add_arc(2, 3, 1) g2.add_arc(3, 4, 0) g2.add_arc(3, 4, 1) gtn.backward(gtn.forward_score(gtn.compose(closure(g1), g2))) def forward_fn(g, g2=g2): return gtn.forward_score(gtn.compose(closure(g), g2)) self.assertTrue(numerical_grad_check(forward_fn, g1, 1e-3, 1e-3))
def process(b): scale = make_scalar_graph(scales[b]) gtn.backward(losses[b], scale) emissions = emissions_graphs[b] if calc_emissions: grad = emissions.grad().weights_to_numpy() input_grad[b] = torch.tensor(grad).view(1, T, C)
def process(b): T = ilens[b] gtn.backward(losses[b], False) emissions = emissions_graphs[b] grad = emissions.grad().weights_to_numpy() input_grad[b][:T] = torch.from_numpy(grad).view(1, T, C) * scales[b]
def test_simple_decomposition(self): T = 5 tokens = ["a", "b", "ab", "ba", "aba"] scores = torch.randn((1, T, len(tokens)), requires_grad=True) labels = [[0, 1, 0]] transducer = Transducer(tokens=tokens, graphemes_to_idx={ "a": 0, "b": 1 }) # Hand construct the alignment graph with all of the decompositions alignments = gtn.Graph(False) alignments.add_node(True) # Add the path ['a', 'b', 'a'] alignments.add_node() alignments.add_arc(0, 1, 0) alignments.add_arc(1, 1, 0) alignments.add_node() alignments.add_arc(1, 2, 1) alignments.add_arc(2, 2, 1) alignments.add_node(False, True) alignments.add_arc(2, 3, 0) alignments.add_arc(3, 3, 0) # Add the path ['a', 'ba'] alignments.add_node(False, True) alignments.add_arc(1, 4, 3) alignments.add_arc(4, 4, 3) # Add the path ['ab', 'a'] alignments.add_node() alignments.add_arc(0, 5, 2) alignments.add_arc(5, 5, 2) alignments.add_arc(5, 3, 0) # Add the path ['aba'] alignments.add_node(False, True) alignments.add_arc(0, 6, 4) alignments.add_arc(6, 6, 4) emissions = gtn.linear_graph(T, len(tokens), True) emissions.set_weights(scores.data_ptr()) expected_loss = gtn.subtract( gtn.forward_score(emissions), gtn.forward_score(gtn.intersect(emissions, alignments)), ) loss = transducer(scores, labels) self.assertAlmostEqual(loss.item(), expected_loss.item(), places=5) loss.backward() gtn.backward(expected_loss) expected_grad = torch.tensor(emissions.grad().weights_to_numpy()) expected_grad = expected_grad.view((1, T, len(tokens))) self.assertTrue( torch.allclose(scores.grad, expected_grad, rtol=1e-4, atol=1e-5))
def process(b): for t, window in enumerate(output_graphs[b]): for c, out in enumerate(window): delta = make_scalar_graph(deltas[b, t, c]) gtn.backward(out, delta) grad = (input_graphs[b][t].grad().weights_to_numpy().reshape( kernel_size, -1)) input_grad[b, t * stride:t * stride + kernel_size] += grad
def test_backward_calls_once(self): g1 = gtn.scalar_graph(1) g2 = gtn.scalar_graph(1) gout = gtn.add(g1, g2) gtn.backward([gout]) pmap_grad = gout.grad() gout = gtn.add(g1, g2) gtn.backward(gout) grad = gout.grad() self.assertTrue(gtn.equal(pmap_grad, grad))
def process(b): gtn.backward(losses[b], False) emissions = emissions_graphs[b] transitions = transitions_graphs[b] if input_grad is not None: grad = emissions.grad().weights_to_numpy() input_grad[b] = torch.from_numpy(grad).view(1, T, C) * scales[b] if transitions_grad is not None: grad = transitions.grad().weights_to_numpy() transitions_grad[b] = ( torch.from_numpy(grad).view(1, C + 1, C) * scales[b])
def test_autograd(self): # The graph is not retained by default g1 = gtn.scalar_graph(3.0) g2 = gtn.scalar_graph(3.0) result = gtn.add(g1, g2) gtn.backward(result) # Cannot backward twice when graph is cleared. self.assertRaises(ValueError, gtn.backward, result) # Check the graph is retained g1.zero_grad() g2.zero_grad() result = gtn.add(g1, g2) gtn.backward(result, True) result.zero_grad() g1.zero_grad() g2.zero_grad() gtn.backward(result, True) self.assertEqual(g1.grad().item(), 1.0) self.assertEqual(g2.grad().item(), 1.0) # Check that provided input gradients are used. g1.zero_grad() g2.zero_grad() result = gtn.add(g1, g2) deltas = gtn.Graph() deltas.add_node(True) deltas.add_node(False, True) deltas.add_arc(0, 1, 0, 0, 7.0) gtn.backward(result, deltas) self.assertEqual(g1.grad().item(), 7.0) self.assertEqual(g2.grad().item(), 7.0)
def test_grad_available(self): g = gtn.Graph() g.add_node(True) g.add_node() g.add_node(False, True) g.add_arc(0, 1, 0, 0, 1) g.add_arc(0, 1, 1, 1, 2) g.add_arc(0, 1, 2, 2, 3) g.add_arc(1, 2, 0, 0, 1) g.add_arc(1, 2, 1, 1, 2) g.add_arc(1, 2, 2, 2, 3) self.assertFalse(g.is_grad_available()) gtn.backward(gtn.forward_score(g)) self.assertTrue(g.is_grad_available())
def test_retain_graph(self): # The graph is not retained by default g1 = gtn.Graph(True) g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 0, 0, 3.0) g2 = gtn.Graph(True) g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, 0, 0, 3.0) result = gtn.add(g1, g2) gtn.backward(result) with self.assertRaises(ValueError): gtn.backward(result) # Check the graph is retained g1.zero_grad() g2.zero_grad() result = gtn.add(g1, g2) gtn.backward(result, True) g1.zero_grad() g2.zero_grad() result.zero_grad() gtn.backward(result, True) self.assertTrue(g1.grad().item() == 1.0) self.assertTrue(g2.grad().item() == 1.0)
def test_viterbi_score_grad(self): g = gtn.Graph() g.add_node(True) g.add_node() g.add_node(False, True) g.add_arc(0, 1, 0, 0, 1) g.add_arc(0, 1, 1, 1, 2) g.add_arc(0, 1, 2, 2, 3) g.add_arc(1, 2, 0, 0, 1) g.add_arc(1, 2, 1, 1, 2) g.add_arc(1, 2, 2, 2, 3) gtn.backward(gtn.viterbi_score(g)) expected = [0.0, 0.0, 1.0, 0.0, 0.0, 1.0] self.assertEqual(g.grad().weights_to_list(), expected) # Handle two start nodes g = gtn.Graph() g.add_node(True) g.add_node(True) g.add_node(False, True) g.add_arc(0, 1, 0, 0, -5) g.add_arc(0, 2, 0, 0, 1) g.add_arc(1, 2, 0, 0, 2) gtn.backward(gtn.viterbi_score(g)) expected = [0.0, 0.0, 1.0] self.assertEqual(g.grad().weights_to_list(), expected) # Handle two accept nodes g = gtn.Graph() g.add_node(True) g.add_node(False, True) g.add_node(False, True) g.add_arc(0, 1, 0, 0, 2) g.add_arc(0, 2, 0, 0, 2) g.add_arc(1, 2, 0, 0, 2) gtn.backward(gtn.viterbi_score(g)) expected = [1.0, 0.0, 1.0] self.assertEqual(g.grad().weights_to_list(), expected) # A more complex test case g_str = [ "0 1", "3 4", "0 1 0 0 2", "0 2 1 1 1", "1 2 0 0 2", "2 3 0 0 1", "2 3 1 1 1", "1 4 0 0 2", "2 4 1 1 3", "3 4 0 0 2", ] g = create_graph_from_text(g_str) gtn.backward(gtn.viterbi_score(g)) # two possible paths with same viterbi score expected1 = [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0] expected2 = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0] self.assertTrue(g.grad().weights_to_list() == expected1 or g.grad().weights_to_list() == expected2)
def test_clone_project_grad(self): g1 = gtn.scalar_graph(3.0) g2 = gtn.scalar_graph(4.0) cloned = gtn.clone(g1) result = gtn.add(g1, g2) gtn.backward(result) # Cloned wasn't used in the computation self.assertRaises(RuntimeError, cloned.grad) # Cloned was used in the computation g1.zero_grad() g2.zero_grad() result = gtn.add(cloned, g2) gtn.backward(result) self.assertTrue(gtn.equal(cloned.grad(), g1.grad()))
def test_calc_grad(self): g1 = gtn.Graph(False) g1.calc_grad = True g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 1, 1, 1.0) g2 = gtn.Graph(True) g2.calc_grad = False g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 0, 1, 1, 1.0) result = gtn.add(g1, g2) gtn.backward(result) self.assertTrue(g1.grad().item() == 1.0) with self.assertRaises(RuntimeError): g2.grad()
def test_input_grad(self): # Check that provided input gradients are used. g1 = gtn.Graph(True) g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 0, 0, 3.0) g2 = gtn.Graph(True) g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, 0, 0, 3.0) result = gtn.add(g1, g2) deltas = gtn.Graph() deltas.add_node(True) deltas.add_node(False, True) deltas.add_arc(0, 1, 0, 0, 7.0) gtn.backward(result, deltas) self.assertTrue(g1.grad().item() == 7.0) self.assertTrue(g2.grad().item() == 7.0)
def test_scalar_ops(self): g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 0, 0, 1.0) # Test negate: res = gtn.negate(g1) self.assertEqual(res.item(), -1.0) gtn.backward(res) self.assertEqual(g1.grad().item(), -1.0) g1.zero_grad() g2 = gtn.Graph() g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, 0, 0, 3.0) # Test add: res = gtn.add(g1, g2) self.assertEqual(res.item(), 4.0) gtn.backward(res) self.assertEqual(g1.grad().item(), 1.0) self.assertEqual(g2.grad().item(), 1.0) g1.zero_grad() g2.zero_grad() # Test subtract: res = gtn.subtract(g1, g2) self.assertEqual(res.item(), -2.0) gtn.backward(res) self.assertEqual(g1.grad().item(), 1.0) self.assertEqual(g2.grad().item(), -1.0)
def test_scalar_ops_grad(self): g1 = gtn.scalar_graph(3.0) result = gtn.negate(g1) gtn.backward(result) self.assertEqual(g1.grad().item(), -1.0) g1.zero_grad() g2 = gtn.scalar_graph(4.0) result = gtn.add(g1, g2) gtn.backward(result) self.assertEqual(g1.grad().item(), 1.0) self.assertEqual(g2.grad().item(), 1.0) g1.zero_grad() g2.zero_grad() result = gtn.subtract(g1, g2) gtn.backward(result) self.assertEqual(g1.grad().item(), 1.0) self.assertEqual(g2.grad().item(), -1.0) g1.zero_grad() g2.zero_grad() result = gtn.add(gtn.add(g1, g2), g1) gtn.backward(result) self.assertEqual(g1.grad().item(), 2.0) self.assertEqual(g2.grad().item(), 1.0) g1.zero_grad() g2nograd = gtn.scalar_graph(4.0, False) result = gtn.add(g1, g2nograd) gtn.backward(result) self.assertEqual(g1.grad().item(), 1.0) self.assertRaises(RuntimeError, g2nograd.grad)
def test_compose_grad(self): first = gtn.Graph() first.add_node(True) first.add_node() first.add_node() first.add_node() first.add_node(False, True) first.add_arc(0, 1, 0, 0, 0) first.add_arc(0, 1, 1, 1, 1) first.add_arc(0, 1, 2, 2, 2) first.add_arc(1, 2, 0, 0, 0) first.add_arc(1, 2, 1, 1, 1) first.add_arc(1, 2, 2, 2, 2) first.add_arc(2, 3, 0, 0, 0) first.add_arc(2, 3, 1, 1, 1) first.add_arc(2, 3, 2, 2, 2) first.add_arc(3, 4, 0, 0, 0) first.add_arc(3, 4, 1, 1, 1) first.add_arc(3, 4, 2, 2, 2) second = gtn.Graph() second.add_node(True) second.add_node() second.add_node(False, True) second.add_arc(0, 1, 0, 0, 3.5) second.add_arc(1, 1, 0, 0, 2.5) second.add_arc(1, 2, 1, 1, 1.5) second.add_arc(2, 2, 1, 1, 4.5) composed = gtn.compose(first, second) gtn.backward(composed) gradsFirst = [1, 0, 0, 1, 1, 0, 1, 2, 0, 0, 2, 0] gradsSecond = [1, 2, 3, 2] self.assertEqual(gradsFirst, first.grad().weights_to_list()) self.assertEqual(gradsSecond, second.grad().weights_to_list())
def test_concat_grad(self): g1 = gtn.Graph() g1.add_node(True) g1.add_node() g1.add_node(False, True) g1.add_arc(0, 1, 0) g1.add_arc(1, 2, 1) # Works with a no gradient graph g2 = gtn.Graph()(False) g2.add_node(True) g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, 0) g2.add_arc(1, 2, 1) g3 = gtn.Graph() g3.add_node(True) g3.add_node() g3.add_node(False, True) g3.add_arc(0, 1, 0) g3.add_arc(1, 2, 1) gtn.backward(gtn.forward_score(concat([g1, g2, g3]))) def forward_fn1(g, g2=g2, g3=g3): return gtn.forward_score(gtn.concat([g, g2, g3])) self.assertTrue(numerical_grad_check(forward_fn1, g1, 1e-4, 1e-3)) def forward_fn2(g, g1=g1, g2=g2): return gtn.forward_score(gtn.concat([g1, g2, g])) self.assertTrue(numerical_grad_check(forward_fn2, g1, 1e-4, 1e-3)) CHECK_THROWS(g2.grad())
def test_parallel_backward(self): inputs1 = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]] inputs2 = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]] outputs = gtn.add(inputs1, inputs2) gtn.backward(outputs) # Test gradients inputs1 = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]] inputs2 = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]] outputs = gtn.add(inputs1, inputs2) gradIn = gtn.scalar_graph(5.0) gtn.backward(outputs, [gradIn], [False]) inputs1Dup = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]] inputs2Dup = [gtn.scalar_graph(k) for k in [1.0, 2.0, 3.0]] expected = [] for g1, g2 in zip(inputs1Dup, inputs2Dup): expected.append(gtn.add(g1, g2)) for g in expected: gtn.backward(g, gtn.scalar_graph(5.0)) for i in range(0, len(expected)): self.assertTrue(gtn.equal(inputs1[i].grad(), inputs1Dup[i].grad())) self.assertTrue(gtn.equal(inputs2[i].grad(), inputs2Dup[i].grad()))
def seq_grad(b): gtn.backward(losses[b])
def backward_single(b): gtn.backward(losses[b]) emissions = emissions_graphs[b] grad = emissions.grad().weights_to_numpy() input_grad[b] = torch.from_numpy(grad).view(1, T, C)
def bwd(): gtn.backward(out, [True])
def main(): num_features = 3 # number of input features num_classes = 2 # number of output classes num_train = 1000 # size of the training set num_test = 200 # size of the testing set # Setup ground-truth model: gt_potentials, gt_transitions = gen_model(num_features, num_classes) # Sample training and test datasets: samples = sample_model( num_features, num_classes, gt_potentials, gt_transitions, num_train + num_test) train, test = samples[:num_train], samples[num_train:] print(f"Using {len(train)} samples for the training set") print(f"Using {len(test)} samples for the test set") # Make the graphs for learning: potentials, transitions = gen_model( num_features, num_classes, calc_grad=True, init=False) print("Unary potential graph has {} nodes and {} arcs".format( potentials.num_nodes(), potentials.num_arcs())) print("Transition graph has {} nodes and {} arcs".format( transitions.num_nodes(), transitions.num_arcs())) # Make the graphs to be learned: potentials, transitions = gen_model( num_features, num_classes, calc_grad=True, init=False) # Run the SGD loop: learning_rate = 1e-2 max_iter = 10000 losses = [] for it, (X, Y) in enumerate(sampler(train)): # Compute the loss and take a gradient step: loss = crf_loss(X, Y, potentials, transitions) gtn.backward(loss) update_params(-learning_rate, potentials, transitions) # Clear the gradients: transitions.zero_grad() potentials.zero_grad() losses.append(loss.item()) if (it + 1) % 1000 == 0: print("=" * 50) print(f"Iteration {it + 1}, Avg. Loss {np.mean(losses):.3f}") losses = [] if it == max_iter: break # Evaluate on the test set: correct = 0.0 total = 0 for X, Y in test: full_graph = gtn.compose(gtn.compose(X, potentials), transitions) prediction = gtn.viterbi_path(full_graph).labels_to_list(False) correct += np.sum(np.array(Y.labels_to_list()) == prediction) total += len(prediction) print("Test: Accuracy {:.3f}".format(correct / total))
def test_forward_score_grad(self): g = gtn.Graph() g.add_node(True) g.add_node() g.add_node(False, True) g.add_arc(0, 1, 0, 0, 1) g.add_arc(0, 1, 1, 1, 2) g.add_arc(0, 1, 2, 2, 3) g.add_arc(1, 2, 0, 0, 1) g.add_arc(1, 2, 1, 1, 2) g.add_arc(1, 2, 2, 2, 3) gtn.backward(gtn.forward_score(g)) self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3)) # Handle two start nodes g = gtn.Graph() g.add_node(True) g.add_node(True) g.add_node(False, True) g.add_arc(0, 1, 0, 0, -5) g.add_arc(0, 2, 0, 0, 1) g.add_arc(1, 2, 0, 0, 2) gtn.backward(gtn.forward_score(g)) self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3)) denom = 1 / (math.exp(-3) + math.exp(1) + math.exp(2)) grad = g.grad() grad_weights = grad.weights_to_list() self.assertAlmostEqual(grad_weights[0], (denom * math.exp(-3))) self.assertAlmostEqual(grad_weights[1], (denom * math.exp(1))) self.assertAlmostEqual(grad_weights[2], (denom * (math.exp(-3) + math.exp(2)))) # Handle two accept nodes g = gtn.Graph() g.add_node(True) g.add_node(False, True) g.add_node(False, True) g.add_arc(0, 1, 0, 0, 2) g.add_arc(0, 2, 0, 0, 2) g.add_arc(1, 2, 0, 0, 2) gtn.backward(gtn.forward_score(g)) self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3)) denom = 1 / (2 * math.exp(2) + math.exp(4)) grad = g.grad() grad_weights = grad.weights_to_list() self.assertAlmostEqual(grad_weights[0], (denom * (math.exp(2) + math.exp(4))), places=5) self.assertAlmostEqual(grad_weights[1], (denom * math.exp(2)), places=5) self.assertAlmostEqual(grad_weights[2], (denom * math.exp(4)), places=5) # Handle case where some arcs don't lead to accepting states g = gtn.Graph() g.add_node(True) g.add_node(False, False) g.add_node(False, True) g.add_arc(0, 1, 0, 0, 2) g.add_arc(0, 2, 0, 0, 2) gtn.backward(gtn.forward_score(g)) self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3)) grad = g.grad() grad_weights = grad.weights_to_list() self.assertAlmostEqual(grad_weights[0], (0.0)) self.assertAlmostEqual(grad_weights[1], (1.0)) # Handles negative infinity g = gtn.Graph() g.add_node(True) g.add_node(False, True) g.add_arc(0, 1, 0, 0, -math.inf) g.add_arc(0, 1, 1, 1, -math.inf) gtn.backward(gtn.forward_score(g)) grad = g.grad() grad_weights = grad.weights_to_list() self.assertTrue(math.isnan(grad_weights[0])) self.assertTrue(math.isnan(grad_weights[1])) g2 = gtn.Graph() g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, 0, 0, -math.inf) g2.add_arc(0, 1, 1, 1, 1.0) gtn.backward(gtn.forward_score(g2)) grad2 = g2.grad() grad_weights = grad2.weights_to_list() self.assertAlmostEqual(grad_weights[0], (0.0)) self.assertAlmostEqual(grad_weights[1], (1.0)) # Handles infinity g = gtn.Graph() g.add_node(True) g.add_node(False, True) g.add_arc(0, 1, 0, 0, math.inf) g.add_arc(0, 1, 1, 1, math.inf) gtn.backward(gtn.forward_score(g)) grad = g.grad() grad_weights = grad.weights_to_list() self.assertTrue(math.isnan(grad_weights[0])) self.assertTrue(math.isnan(grad_weights[1])) g2 = gtn.Graph() g2.add_node(True) g2.add_node(False, True) g2.add_arc(0, 1, 0, 0, math.inf) g2.add_arc(0, 1, 1, 1, 1.0) gtn.backward(gtn.forward_score(g2)) grad2 = g2.grad() grad_weights = grad.weights_to_list() self.assertTrue(math.isnan(grad_weights[0])) self.assertTrue(math.isnan(grad_weights[1])) # A more complex test case g_str = [ "0 1", "3 4", "0 1 0 0 2", "0 2 1 1 1", "1 2 0 0 2", "2 3 0 0 1", "2 3 1 1 1", "1 4 0 0 2", "2 4 1 1 3", "3 4 0 0 2", ] g = create_graph_from_text(g_str) gtn.backward(gtn.forward_score(g)) self.assertTrue(numerical_grad_check(gtn.forward_score, g, 1e-3, 1e-3))
def test_sample_grad(self): g = gtn.Graph() g.add_node(True) g.add_node() g.add_node(False, True) g.add_arc(0, 0, 0) g.add_arc(0, 1, 1) g.add_arc(1, 0, 2) g.add_arc(1, 2, 3) for i in range(5): g.zero_grad() path = gtn.sample(g) # One for each arc in the original graph grads = [0.0, 0.0, 0.0, 0.0] path_labels = path.labels_to_list() for a in range(path.num_arcs()): grads[path_labels[a]] += 1 gtn.backward(path) self.assertTrue(grads == g.grad().weights_to_list()) def test_sum_grad(self): g1 = gtn.Graph() g1.add_node(True) g1.add_node() g1.add_node(False, True) g1.add_arc(0, 1, 0) g1.add_arc(1, 2, 1) # Works with a no gradient graph g2 = gtn.Graph()(False) g2.add_node(True) g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, 0) g2.add_arc(1, 2, 1) g3 = gtn.Graph() g3.add_node(True) g3.add_node() g3.add_node(False, True) g3.add_arc(0, 1, 0) g3.add_arc(1, 2, 1) gtn.backward(gtn.forward_score(gtn.union([g1, g2, g3]))) def forward_fn1(g, g2=g2, g3=g3): return gtn.forward_score(gtn.union([g, g2, g3])) self.assertTrue(numerical_grad_check(forward_fn1, g1, 1e-4, 1e-3)) def forward_fn2(g, g1=g1, g2=g2): return gtn.forward_score(gtn.union([g1, g2, g])) self.assertTrue(numerical_grad_check(forward_fn2, g3, 1e-4, 1e-3)) CHECK_THROWS(g2.grad()) def test_concat_grad(self): g1 = gtn.Graph() g1.add_node(True) g1.add_node() g1.add_node(False, True) g1.add_arc(0, 1, 0) g1.add_arc(1, 2, 1) # Works with a no gradient graph g2 = gtn.Graph()(False) g2.add_node(True) g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, 0) g2.add_arc(1, 2, 1) g3 = gtn.Graph() g3.add_node(True) g3.add_node() g3.add_node(False, True) g3.add_arc(0, 1, 0) g3.add_arc(1, 2, 1) gtn.backward(gtn.forward_score(concat([g1, g2, g3]))) def forward_fn1(g, g2=g2, g3=g3): return gtn.forward_score(gtn.concat([g, g2, g3])) self.assertTrue(numerical_grad_check(forward_fn1, g1, 1e-4, 1e-3)) def forward_fn2(g, g1=g1, g2=g2): return gtn.forward_score(gtn.concat([g1, g2, g])) self.assertTrue(numerical_grad_check(forward_fn2, g1, 1e-4, 1e-3)) CHECK_THROWS(g2.grad()) def test_closure_grad(self): g1 = gtn.Graph() g1.add_node(True) g1.add_node(False, True) g1.add_arc(0, 1, 0, 0, 1.3) g1.add_arc(1, 1, 1, 1, 2.1) g2 = gtn.Graph() g2.add_node(True) g2.add_node() g2.add_node() g2.add_node() g2.add_node(False, True) g2.add_arc(0, 1, 0) g2.add_arc(0, 1, 1) g2.add_arc(1, 2, 0) g2.add_arc(1, 2, 1) g2.add_arc(2, 3, 0) g2.add_arc(2, 3, 1) g2.add_arc(3, 4, 0) g2.add_arc(3, 4, 1) gtn.backward(gtn.forward_score(gtn.compose(closure(g1), g2))) def forward_fn(g, g2=g2): return gtn.forward_score(gtn.compose(closure(g), g2)) self.assertTrue(numerical_grad_check(forward_fn, g1, 1e-3, 1e-3))
def test_ctc_criterion(self): # These test cases are taken from wav2letter: https:#fburl.com/msom2e4v # Test case 1 ctc = ctc_graph([0, 0], 1) emissions = emissions_graph([1.0, 0.0, 0.0, 1.0, 1.0, 0.0], 3, 2) loss = gtn.forward_score(gtn.compose(ctc, emissions)) self.assertEqual(loss.item(), 0.0) # Should be 0 since scores are normalized z = gtn.forward_score(emissions) self.assertEqual(z.item(), 0.0) # Test case 2 T = 3 N = 4 ctc = ctc_graph([1, 2], N - 1) emissions = emissions_graph([1.0] * (T * N), T, N) expected_loss = -math.log(0.25 * 0.25 * 0.25 * 5) loss = gtn.subtract(gtn.forward_score(gtn.compose(ctc, emissions)), gtn.forward_score(emissions)) self.assertAlmostEqual(-loss.item(), expected_loss) # Test case 3 T = 5 N = 6 target = [0, 1, 2, 1, 0] # generate CTC graph ctc = ctc_graph(target, N - 1) # fmt: off emissions_vec = [ 0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553, 0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436, 0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688, 0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533, 0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107, ] # fmt: on emissions = emissions_graph(emissions_vec, T, N) # The log probabilities are already normalized, # so this should be close to 0 z = gtn.forward_score(emissions) self.assertTrue(abs(z.item()) < 1e-5) loss = gtn.subtract(z, gtn.forward_score(gtn.compose(ctc, emissions))) expected_loss = 3.34211 self.assertAlmostEqual(loss.item(), expected_loss, places=5) # Check the gradients gtn.backward(loss) # fmt: off expected_grad = [ -0.366234, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553, 0.111121, -0.411608, 0.278779, 0.0055756, 0.00569609, 0.010436, 0.0357786, 0.633813, -0.678582, 0.00249248, 0.00272882, 0.0037688, 0.0663296, -0.356151, 0.280111, 0.00283995, 0.0035545, 0.00331533, -0.541765, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107 ] # fmt: on all_close = True grad = emissions.grad() grad_weights = grad.weights_to_list() for i in range(T * N): g = grad_weights[i] all_close = all_close and (abs(expected_grad[i] - g) < 1e-5) self.assertTrue(all_close) # Test case 4 # This test case is taken from Tensor Flow CTC implementation # tinyurl.com/y9du5v5a T = 5 N = 6 target = [0, 1, 1, 0] # generate CTC graph ctc = ctc_graph(target, N - 1) # fmt: off emissions_vec = [ 0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508, 0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549, 0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456, 0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345, 0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046, ] # fmt: on emissions = emissions_graph(emissions_vec, T, N) # The log probabilities are already normalized, # so this should be close to 0 z = gtn.forward_score(emissions) self.assertTrue(abs(z.item()) < 1e-5) loss = gtn.subtract(z, gtn.forward_score(gtn.compose(ctc, emissions))) expected_loss = 5.42262 self.assertAlmostEqual(loss.item(), expected_loss, places=4) # Check the gradients gtn.backward(loss) # fmt: off expected_grad = [ -0.69824, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508, 0.24082, -0.602467, 0.0557226, 0.0546814, 0.0557528, 0.19549, 0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, -0.797544, 0.280884, -0.570478, 0.0326593, 0.0339046, 0.0326856, 0.190345, -0.576714, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046, ] # fmt: on all_close = True grad = emissions.grad() grad_weights = grad.weights_to_list() for i in range(T * N): g = grad_weights[i] all_close = all_close and (abs(expected_grad[i] - g) < 1e-5) self.assertTrue(all_close)
def test_asg_criterion(self): # This test cases is taken from wav2letter: https://fburl.com/msom2e4v T = 5 N = 6 # fmt: off targets = [ [2, 1, 5, 1, 3], [4, 3, 5], [3, 2, 2, 1], ] expected_loss = [ 7.7417464256287, 6.4200420379639, 8.2780694961548, ] emissions_vecs = [ [ -0.4340, -0.0254, 0.3667, 0.4180, -0.3805, -0.1707, 0.1060, 0.3631, -0.1122, -0.3825, -0.0031, -0.3801, 0.0443, -0.3795, 0.3194, -0.3130, 0.0094, 0.1560, 0.1252, 0.2877, 0.1997, -0.4554, 0.2774, -0.2526, -0.4001, -0.2402, 0.1295, 0.0172, 0.1805, -0.3299 ], [ 0.3298, -0.2259, -0.0959, 0.4909, 0.2996, -0.2543, -0.2863, 0.3239, -0.3988, 0.0732, -0.2107, -0.4739, -0.0906, 0.0480, -0.1301, 0.3975, -0.3317, -0.1967, 0.4372, -0.2006, 0.0094, 0.3281, 0.1873, -0.2945, 0.2399, 0.0320, -0.3768, -0.2849, -0.2248, 0.3186, ], [ 0.0225, -0.3867, -0.1929, -0.2904, -0.4958, -0.2533, 0.4001, -0.1517, -0.2799, -0.2915, 0.4198, 0.4506, 0.1446, -0.4753, -0.0711, 0.2876, -0.1851, -0.1066, 0.2081, -0.1190, -0.3902, -0.1668, 0.1911, -0.2848, -0.3846, 0.1175, 0.1052, 0.2172, -0.0362, 0.3055, ], ] emissions_grads = [ [ 0.1060, 0.1595, -0.7639, 0.2485, 0.1118, 0.1380, 0.1915, -0.7524, 0.1539, 0.1175, 0.1717, 0.1178, 0.1738, 0.1137, 0.2288, 0.1216, 0.1678, -0.8057, 0.1766, -0.7923, 0.1902, 0.0988, 0.2056, 0.1210, 0.1212, 0.1422, 0.2059, -0.8160, 0.2166, 0.1300, ], [ 0.2029, 0.1164, 0.1325, 0.2383, -0.8032, 0.1131, 0.1414, 0.2602, 0.1263, -0.3441, -0.3009, 0.1172, 0.1557, 0.1788, 0.1496, -0.5498, 0.0140, 0.0516, 0.2306, 0.1219, 0.1503, -0.4244, 0.1796, -0.2579, 0.2149, 0.1745, 0.1160, 0.1271, 0.1350, -0.7675, ], [ 0.2195, 0.1458, 0.1770, -0.8395, 0.1307, 0.1666, 0.2148, 0.1237, -0.6613, -0.1223, 0.2191, 0.2259, 0.2002, 0.1077, -0.8386, 0.2310, 0.1440, 0.1557, 0.2197, -0.1466, -0.5742, 0.1510, 0.2160, 0.1342, 0.1050, -0.8265, 0.1714, 0.1917, 0.1488, 0.2094, ], ] # fmt: on transitions = gtn.Graph() transitions.add_node(True) for i in range(1, N + 1): transitions.add_node(False, True) transitions.add_arc(0, i, i - 1) # p(i | <s>) for i in range(N): for j in range(N): transitions.add_arc(j + 1, i + 1, i) # p(i | j) for b in range(len(targets)): target = targets[b] emissions_vec = emissions_vecs[b] emissions_grad = emissions_grads[b] fal = gtn.Graph() fal.add_node(True) for l in range(1, len(target) + 1): fal.add_node(False, l == len(target)) fal.add_arc(l - 1, l, target[l - 1]) fal.add_arc(l, l, target[l - 1]) emissions = emissions_graph(emissions_vec, T, N, True) loss = gtn.subtract( gtn.forward_score(gtn.compose(emissions, transitions)), gtn.forward_score( gtn.compose(gtn.compose(fal, transitions), emissions)), ) self.assertAlmostEqual(loss.item(), expected_loss[b], places=3) # Check the gradients gtn.backward(loss) all_close = True grad = emissions.grad() grad_weights = grad.weights_to_list() for i in range(T * N): g = grad_weights[i] all_close = all_close and (abs(emissions_grad[i] - g) < 1e-4) self.assertTrue(all_close) all_close = True # fmt: off trans_grad = [ 0.3990, 0.3396, 0.3486, 0.3922, 0.3504, 0.3155, 0.3666, 0.0116, -1.6678, 0.3737, 0.3361, -0.7152, 0.3468, 0.3163, -1.1583, -0.6803, 0.3216, 0.2722, 0.3694, -0.6688, 0.3047, -0.8531, -0.6571, 0.2870, 0.3866, 0.3321, 0.3447, 0.3664, -0.2163, 0.3039, 0.3640, -0.6943, 0.2988, -0.6722, 0.3215, -0.1860, ] # fmt: on grad = transitions.grad() grad_weights = grad.weights_to_list() for i in range(N * N): g = grad_weights[i + N] all_close = all_close and (abs(trans_grad[i] - g) < 1e-4) self.assertTrue(all_close)