def forward_fn(g): paths = [ gtn.viterbi_path(g), gtn.viterbi_path(g), gtn.viterbi_path(g) ] return gtn.forward_score(gtn.union(paths))
def sample_model( num_features, num_classes, potentials, transitions, num_samples, max_len=20): """ Sample `num_samples` from a linear-chain CRF specified by a `potentials` graph and a `transitions` graph. The samples will have a random length in `[1, max_len]`. """ model = gtn.compose(potentials, transitions) # Draw a random X with length randomly from [1, max_len] and find the # most likely Y under the model: samples = [] while len(samples) < num_samples: # Sample X: T = np.random.randint(1, max_len + 1) X = np.random.randint(0, num_features, size=(T,)) X = make_chain_graph(X) # Find the most likely Y given X: Y = gtn.viterbi_path(gtn.compose(X, model)) # Clean up Y: Y = gtn.project_output(Y) Y.set_weights(np.zeros(Y.num_arcs())) samples.append((X, Y)) return samples
def test_viterbi_path_grad(self): g_str = [ "0 1", "3 4", "0 1 0 0 2", "0 2 1 1 1", "1 2 0 0 2", "2 3 0 0 1", "2 3 1 1 3", "1 4 0 0 2", "2 4 1 1 3", "3 4 0 0 2", ] g = create_graph_from_text(g_str) gtn.backward(gtn.viterbi_path(g)) expected = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0] self.assertEqual(g.grad().weights_to_list(), expected) g.zero_grad() def forward_fn(g): paths = [ gtn.viterbi_path(g), gtn.viterbi_path(g), gtn.viterbi_path(g) ] return gtn.forward_score(gtn.union(paths)) gtn.backward(forward_fn(g)) self.assertTrue(numerical_grad_check(forward_fn, g, 1e-3, 1e-5))
def process(b): emissions = gtn.linear_graph(T, C, False) cpu_data = outputs[b].cpu().contiguous() emissions.set_weights(cpu_data.data_ptr()) if self.transitions is not None: full_graph = gtn.intersect(emissions, self.transitions) else: full_graph = emissions # Find the best path and remove back-off arcs: path = gtn.remove(gtn.viterbi_path(full_graph)) # Left compose the viterbi path with the "alignment to token" # transducer to get the outputs: path = gtn.compose(path, self.tokens) # When there are ambiguous paths (allow_repeats is true), we take # the shortest: path = gtn.viterbi_path(path) path = gtn.remove(gtn.project_output(path)) paths[b] = path.labels_to_list()
def pred_seq(batch_index): obs_fst = linearFstFromArray(arc_scores[batch_index].reshape( num_samples, -1)) # Compose each sequence fst individually: it seems like composition # only works for lattices denom_fst = obs_fst for seq_fst in seq_fsts: denom_fst = gtn.compose(denom_fst, seq_fst) viterbi_path = gtn.viterbi_path(denom_fst) best_paths[batch_index] = gtn.remove( gtn.project_output(viterbi_path))
def test_asg_viterbi_path(self): # Test adapted from wav2letter https://tinyurl.com/yc6nxex9 T = 4 N = 3 # fmt: off input = [ 0, 0, 7, 5, 4, 3, 5, 8, 5, 5, 4, 3, ] trans = [ 0, 2, 0, 0, 0, 2, 2, 0, 0, ] expectedPath = [2, 1, 1, 0] # fmt: on transitions = gtn.Graph() transitions.add_node(True) for i in range(1, N + 1): transitions.add_node(False, True) transitions.add_arc(0, i, i - 1) # p(i | <s>) for i in range(N): for j in range(N): transitions.add_arc(j + 1, i + 1, i, i, trans[i * N + j]) # p(i | j) emissions = emissions_graph(input, T, N, True) path = gtn.viterbi_path(gtn.compose(emissions, transitions)) self.assertEqual(path.labels_to_list(), expectedPath)
def process(b): # create emission graph g_emissions = gtn.linear_graph(T, C, False) cpu_data = outputs[b].cpu().contiguous() g_emissions.set_weights(cpu_data.data_ptr()) # create transition graph g_transitions = utils.ASGLossFunction.create_transitions_graph( self.transitions) g_path = gtn.viterbi_path(gtn.intersect(g_emissions, g_transitions)) prediction = g_path.labels_to_list() collapsed_prediction = [p for p, _ in groupby(prediction)] if self.garbage_idx is not None: # remove garbage tokens collapsed_prediction = [ p for p in collapsed_prediction if p != self.garbage_idx ] predictions[b] = utils.unpack_replabels(collapsed_prediction, self.num_replabels)
def main(): num_features = 3 # number of input features num_classes = 2 # number of output classes num_train = 1000 # size of the training set num_test = 200 # size of the testing set # Setup ground-truth model: gt_potentials, gt_transitions = gen_model(num_features, num_classes) # Sample training and test datasets: samples = sample_model( num_features, num_classes, gt_potentials, gt_transitions, num_train + num_test) train, test = samples[:num_train], samples[num_train:] print(f"Using {len(train)} samples for the training set") print(f"Using {len(test)} samples for the test set") # Make the graphs for learning: potentials, transitions = gen_model( num_features, num_classes, calc_grad=True, init=False) print("Unary potential graph has {} nodes and {} arcs".format( potentials.num_nodes(), potentials.num_arcs())) print("Transition graph has {} nodes and {} arcs".format( transitions.num_nodes(), transitions.num_arcs())) # Make the graphs to be learned: potentials, transitions = gen_model( num_features, num_classes, calc_grad=True, init=False) # Run the SGD loop: learning_rate = 1e-2 max_iter = 10000 losses = [] for it, (X, Y) in enumerate(sampler(train)): # Compute the loss and take a gradient step: loss = crf_loss(X, Y, potentials, transitions) gtn.backward(loss) update_params(-learning_rate, potentials, transitions) # Clear the gradients: transitions.zero_grad() potentials.zero_grad() losses.append(loss.item()) if (it + 1) % 1000 == 0: print("=" * 50) print(f"Iteration {it + 1}, Avg. Loss {np.mean(losses):.3f}") losses = [] if it == max_iter: break # Evaluate on the test set: correct = 0.0 total = 0 for X, Y in test: full_graph = gtn.compose(gtn.compose(X, potentials), transitions) prediction = gtn.viterbi_path(full_graph).labels_to_list(False) correct += np.sum(np.array(Y.labels_to_list()) == prediction) total += len(prediction) print("Test: Accuracy {:.3f}".format(correct / total))
def main(out_dir=None, gpu_dev_id=None, num_samples=10, random_seed=None, learning_rate=1e-3, num_epochs=500, dataset_kwargs={}, dataloader_kwargs={}, model_kwargs={}): if out_dir is None: out_dir = os.path.join('~', 'data', 'output', 'seqtools', 'test_gtn') out_dir = os.path.expanduser(out_dir) if not os.path.exists(out_dir): os.makedirs(out_dir) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) vocabulary = ['a', 'b', 'c', 'd', 'e'] transition = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 1], [0, 0, 0, 0, 0]], dtype=float) initial = np.array([1, 0, 1, 0, 0], dtype=float) final = np.array([0, 1, 0, 0, 1], dtype=float) / 10 seq_params = (transition, initial, final) simulated_dataset = simulate(num_samples, *seq_params) label_seqs, obsv_seqs = tuple(zip(*simulated_dataset)) seq_params = tuple(map(lambda x: -np.log(x), seq_params)) dataset = torchutils.SequenceDataset(obsv_seqs, label_seqs, **dataset_kwargs) data_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs) train_loader = data_loader val_loader = data_loader transition_weights = torch.tensor(transition, dtype=torch.float).log() initial_weights = torch.tensor(initial, dtype=torch.float).log() final_weights = torch.tensor(final, dtype=torch.float).log() model = libfst.LatticeCrf(vocabulary, transition_weights=transition_weights, initial_weights=initial_weights, final_weights=final_weights, debug_output_dir=fig_dir, **model_kwargs) gtn.draw(model._transition_fst, os.path.join(fig_dir, 'transitions-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(model._duration_fst, os.path.join(fig_dir, 'durations-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) if True: for i, (inputs, targets, seq_id) in enumerate(train_loader): arc_scores = model.scores_to_arc(inputs) arc_labels = model.labels_to_arc(targets) batch_size, num_samples, num_classes = arc_scores.shape obs_fst = libfst.linearFstFromArray(arc_scores[0].reshape( num_samples, -1)) gt_fst = libfst.fromSequence(arc_labels[0]) d1_fst = gtn.compose(obs_fst, model._duration_fst) d1_fst = gtn.project_output(d1_fst) denom_fst = gtn.compose(d1_fst, model._transition_fst) # denom_fst = gtn.project_output(denom_fst) num_fst = gtn.compose(denom_fst, gt_fst) viterbi_fst = gtn.viterbi_path(denom_fst) pred_fst = gtn.remove(gtn.project_output(viterbi_fst)) loss = gtn.subtract(gtn.forward_score(num_fst), gtn.forward_score(denom_fst)) loss = torch.tensor(loss.item()) if torch.isinf(loss).any(): denom_alt = gtn.compose(obs_fst, model._transition_fst) d1_min = gtn.remove(gtn.project_output(d1_fst)) denom_alt = gtn.compose(d1_min, model._transition_fst) num_alt = gtn.compose(denom_alt, gt_fst) gtn.draw(obs_fst, os.path.join(fig_dir, 'observations-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(gt_fst, os.path.join(fig_dir, 'labels-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(d1_fst, os.path.join(fig_dir, 'd1-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(d1_min, os.path.join(fig_dir, 'd1-min-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(denom_fst, os.path.join(fig_dir, 'denominator-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(denom_alt, os.path.join(fig_dir, 'denominator-alt-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(num_fst, os.path.join(fig_dir, 'numerator-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(num_alt, os.path.join(fig_dir, 'numerator-alt-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(viterbi_fst, os.path.join(fig_dir, 'viterbi-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(pred_fst, os.path.join(fig_dir, 'pred-init.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) import pdb pdb.set_trace() # Train the model train_epoch_log = collections.defaultdict(list) val_epoch_log = collections.defaultdict(list) metric_dict = { 'Avg Loss': metrics.AverageLoss(), 'Accuracy': metrics.Accuracy() } criterion = model.nllLoss optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=1.00) model, last_model_wts = torchutils.trainModel( model, criterion, optimizer, scheduler, train_loader, val_loader, metrics=metric_dict, test_metric='Avg Loss', train_epoch_log=train_epoch_log, val_epoch_log=val_epoch_log, num_epochs=num_epochs) gtn.draw(model._transition_fst, os.path.join(fig_dir, 'transitions-trained.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) gtn.draw(model._duration_fst, os.path.join(fig_dir, 'durations-trained.png'), isymbols=model._arc_symbols, osymbols=model._arc_symbols) torchutils.plotEpochLog(train_epoch_log, title="Train Epoch Log", fn=os.path.join(fig_dir, "train-log.png"))
def test_viterbi_path(self): g = gtn.Graph() # Empty graph gives empty path self.assertTrue(gtn.equal(gtn.viterbi_path(g), g)) # Accepting empty string g.add_node(True, True) self.assertTrue(gtn.equal(gtn.viterbi_path(g), g)) # A simple test case g = gtn.Graph() g.add_node(True) g.add_node() g.add_node(False, True) g.add_arc(0, 1, 0, 0, 1) g.add_arc(0, 1, 1, 1, 2) g.add_arc(0, 1, 2, 2, 3) g.add_arc(1, 2, 0, 0, 1) g.add_arc(1, 2, 1, 1, 2) g.add_arc(1, 2, 2, 2, 3) best = gtn.Graph() best.add_node(True) best.add_node() best.add_node(False, True) best.add_arc(0, 1, 2, 2, 3) best.add_arc(1, 2, 2, 2, 3) path = gtn.viterbi_path(g) self.assertTrue(gtn.rand_equivalent(path, best)) self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item()) # Handle a single node. g = gtn.Graph() g.add_node(True, True) best = gtn.Graph() best.add_node(True, True) path = gtn.viterbi_path(g) self.assertTrue(gtn.rand_equivalent(path, best)) self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item()) # Handle two start nodes g = gtn.Graph() g.add_node(True) g.add_node(True) g.add_node(False, True) g.add_arc(0, 1, 0, 0, -5) g.add_arc(0, 2, 0, 0, 1) g.add_arc(1, 2, 0, 0, 2) best = gtn.Graph() best.add_node(True) best.add_node(False, True) best.add_arc(0, 1, 0, 0, 2) path = gtn.viterbi_path(g) self.assertTrue(gtn.rand_equivalent(path, best)) self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item()) # Handle two accept nodes g = gtn.Graph() g.add_node(True) g.add_node(False, True) g.add_node(False, True) g.add_arc(0, 1, 0, 0, 3) g.add_arc(0, 2, 0, 0, 2) g.add_arc(1, 2, 0, 0, 2) best = gtn.Graph() best.add_node(True) best.add_node() best.add_node(False, True) best.add_arc(0, 1, 0, 0, 3) best.add_arc(1, 2, 0, 0, 2) path = gtn.viterbi_path(g) self.assertTrue(gtn.rand_equivalent(path, best)) self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item()) # A more complex test case g_str = [ "0 1", "3 4", "0 1 0 0 2", "0 2 1 1 1", "1 2 0 0 2", "2 3 0 0 1", "2 3 1 1 1", "1 4 0 0 2", "2 4 1 1 3", "3 4 0 0 2", ] g = create_graph_from_text(g_str) # There are three options for the best path, the # viterbiPath may return any of them. best1 = gtn.Graph() best1.add_node(True) best1.add_node() best1.add_node() best1.add_node() best1.add_node(False, True) best1.add_arc(0, 1, 0, 0, 2) best1.add_arc(1, 2, 0, 0, 2) best1.add_arc(2, 3, 0, 0, 1) best1.add_arc(3, 4, 0, 0, 2) best2 = gtn.Graph() best2.add_node(True) best2.add_node() best2.add_node() best2.add_node() best2.add_node(False, True) best2.add_arc(0, 1, 0, 0, 2) best2.add_arc(1, 2, 0, 0, 2) best2.add_arc(2, 3, 1, 1, 1) best2.add_arc(3, 4, 0, 0, 2) best3 = gtn.Graph() best3.add_node(True) best3.add_node() best3.add_node() best3.add_node(False, True) best3.add_arc(0, 1, 0, 0, 2) best3.add_arc(1, 2, 0, 0, 2) best3.add_arc(2, 3, 1, 1, 3) path = gtn.viterbi_path(g) self.assertTrue( ( gtn.rand_equivalent(path, best1) or gtn.rand_equivalent(path, best2) or gtn.rand_equivalent(path, best3) ) ) self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item())