Example #1
0
 def forward_fn(g):
     paths = [
         gtn.viterbi_path(g),
         gtn.viterbi_path(g),
         gtn.viterbi_path(g)
     ]
     return gtn.forward_score(gtn.union(paths))
Example #2
0
def sample_model(
        num_features, num_classes,
        potentials, transitions,
        num_samples, max_len=20):
    """
    Sample `num_samples` from a linear-chain CRF specified
    by a `potentials` graph and a `transitions` graph. The
    samples will have a random length in `[1, max_len]`.
    """
    model = gtn.compose(potentials, transitions)

    # Draw a random X with length randomly from [1, max_len] and find the
    # most likely Y under the model:
    samples = []
    while len(samples) < num_samples:
        # Sample X:
        T = np.random.randint(1, max_len + 1)
        X = np.random.randint(0, num_features, size=(T,))
        X = make_chain_graph(X)
        # Find the most likely Y given X:
        Y = gtn.viterbi_path(gtn.compose(X, model))
        # Clean up Y:
        Y = gtn.project_output(Y)
        Y.set_weights(np.zeros(Y.num_arcs()))
        samples.append((X, Y))
    return samples
Example #3
0
    def test_viterbi_path_grad(self):
        g_str = [
            "0 1",
            "3 4",
            "0 1 0 0 2",
            "0 2 1 1 1",
            "1 2 0 0 2",
            "2 3 0 0 1",
            "2 3 1 1 3",
            "1 4 0 0 2",
            "2 4 1 1 3",
            "3 4 0 0 2",
        ]
        g = create_graph_from_text(g_str)
        gtn.backward(gtn.viterbi_path(g))
        expected = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0]
        self.assertEqual(g.grad().weights_to_list(), expected)
        g.zero_grad()

        def forward_fn(g):
            paths = [
                gtn.viterbi_path(g),
                gtn.viterbi_path(g),
                gtn.viterbi_path(g)
            ]
            return gtn.forward_score(gtn.union(paths))

        gtn.backward(forward_fn(g))
        self.assertTrue(numerical_grad_check(forward_fn, g, 1e-3, 1e-5))
        def process(b):
            emissions = gtn.linear_graph(T, C, False)
            cpu_data = outputs[b].cpu().contiguous()
            emissions.set_weights(cpu_data.data_ptr())
            if self.transitions is not None:
                full_graph = gtn.intersect(emissions, self.transitions)
            else:
                full_graph = emissions

            # Find the best path and remove back-off arcs:
            path = gtn.remove(gtn.viterbi_path(full_graph))
            # Left compose the viterbi path with the "alignment to token"
            # transducer to get the outputs:
            path = gtn.compose(path, self.tokens)

            # When there are ambiguous paths (allow_repeats is true), we take
            # the shortest:
            path = gtn.viterbi_path(path)
            path = gtn.remove(gtn.project_output(path))
            paths[b] = path.labels_to_list()
Example #5
0
        def pred_seq(batch_index):
            obs_fst = linearFstFromArray(arc_scores[batch_index].reshape(
                num_samples, -1))

            # Compose each sequence fst individually: it seems like composition
            # only works for lattices
            denom_fst = obs_fst
            for seq_fst in seq_fsts:
                denom_fst = gtn.compose(denom_fst, seq_fst)

            viterbi_path = gtn.viterbi_path(denom_fst)
            best_paths[batch_index] = gtn.remove(
                gtn.project_output(viterbi_path))
Example #6
0
    def test_asg_viterbi_path(self):
        # Test adapted from wav2letter https://tinyurl.com/yc6nxex9
        T = 4
        N = 3

        # fmt: off
        input = [
            0,
            0,
            7,
            5,
            4,
            3,
            5,
            8,
            5,
            5,
            4,
            3,
        ]
        trans = [
            0,
            2,
            0,
            0,
            0,
            2,
            2,
            0,
            0,
        ]
        expectedPath = [2, 1, 1, 0]
        # fmt: on

        transitions = gtn.Graph()
        transitions.add_node(True)
        for i in range(1, N + 1):
            transitions.add_node(False, True)
            transitions.add_arc(0, i, i - 1)  # p(i | <s>)

        for i in range(N):
            for j in range(N):
                transitions.add_arc(j + 1, i + 1, i, i,
                                    trans[i * N + j])  # p(i | j)

        emissions = emissions_graph(input, T, N, True)

        path = gtn.viterbi_path(gtn.compose(emissions, transitions))

        self.assertEqual(path.labels_to_list(), expectedPath)
Example #7
0
        def process(b):
            # create emission graph
            g_emissions = gtn.linear_graph(T, C, False)
            cpu_data = outputs[b].cpu().contiguous()
            g_emissions.set_weights(cpu_data.data_ptr())

            # create transition graph
            g_transitions = utils.ASGLossFunction.create_transitions_graph(
                self.transitions)
            g_path = gtn.viterbi_path(gtn.intersect(g_emissions,
                                                    g_transitions))
            prediction = g_path.labels_to_list()

            collapsed_prediction = [p for p, _ in groupby(prediction)]
            if self.garbage_idx is not None:
                # remove garbage tokens
                collapsed_prediction = [
                    p for p in collapsed_prediction if p != self.garbage_idx
                ]
            predictions[b] = utils.unpack_replabels(collapsed_prediction,
                                                    self.num_replabels)
Example #8
0
def main():
    num_features = 3  # number of input features
    num_classes = 2   # number of output classes
    num_train = 1000  # size of the training set
    num_test = 200    # size of the testing set

    # Setup ground-truth model:
    gt_potentials, gt_transitions = gen_model(num_features, num_classes)

    # Sample training and test datasets:
    samples = sample_model(
        num_features, num_classes,
        gt_potentials, gt_transitions,
        num_train + num_test)
    train, test = samples[:num_train], samples[num_train:]
    print(f"Using {len(train)} samples for the training set")
    print(f"Using {len(test)} samples for the test set")

    # Make the graphs for learning:
    potentials, transitions = gen_model(
        num_features, num_classes, calc_grad=True, init=False)
    print("Unary potential graph has {} nodes and {} arcs".format(
        potentials.num_nodes(), potentials.num_arcs()))
    print("Transition graph has {} nodes and {} arcs".format(
        transitions.num_nodes(), transitions.num_arcs()))

    # Make the graphs to be learned:
    potentials, transitions = gen_model(
        num_features, num_classes, calc_grad=True, init=False)

    # Run the SGD loop:
    learning_rate = 1e-2
    max_iter = 10000
    losses = []
    for it, (X, Y) in enumerate(sampler(train)):
        # Compute the loss and take a gradient step:
        loss = crf_loss(X, Y, potentials, transitions)
        gtn.backward(loss)
        update_params(-learning_rate, potentials, transitions)

        # Clear the gradients:
        transitions.zero_grad()
        potentials.zero_grad()

        losses.append(loss.item())
        if (it + 1) % 1000 == 0:
            print("=" * 50)
            print(f"Iteration {it + 1}, Avg. Loss {np.mean(losses):.3f}")
            losses = []
        if it == max_iter:
            break

    # Evaluate on the test set:
    correct = 0.0
    total = 0
    for X, Y in test:
        full_graph = gtn.compose(gtn.compose(X, potentials), transitions)
        prediction = gtn.viterbi_path(full_graph).labels_to_list(False)
        correct += np.sum(np.array(Y.labels_to_list()) == prediction)
        total += len(prediction)
    print("Test: Accuracy {:.3f}".format(correct / total))
Example #9
0
def main(out_dir=None,
         gpu_dev_id=None,
         num_samples=10,
         random_seed=None,
         learning_rate=1e-3,
         num_epochs=500,
         dataset_kwargs={},
         dataloader_kwargs={},
         model_kwargs={}):

    if out_dir is None:
        out_dir = os.path.join('~', 'data', 'output', 'seqtools', 'test_gtn')

    out_dir = os.path.expanduser(out_dir)

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    vocabulary = ['a', 'b', 'c', 'd', 'e']

    transition = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1],
                           [0, 1, 0, 0, 1], [0, 0, 0, 0, 0]],
                          dtype=float)
    initial = np.array([1, 0, 1, 0, 0], dtype=float)
    final = np.array([0, 1, 0, 0, 1], dtype=float) / 10

    seq_params = (transition, initial, final)
    simulated_dataset = simulate(num_samples, *seq_params)
    label_seqs, obsv_seqs = tuple(zip(*simulated_dataset))
    seq_params = tuple(map(lambda x: -np.log(x), seq_params))

    dataset = torchutils.SequenceDataset(obsv_seqs, label_seqs,
                                         **dataset_kwargs)
    data_loader = torch.utils.data.DataLoader(dataset, **dataloader_kwargs)

    train_loader = data_loader
    val_loader = data_loader

    transition_weights = torch.tensor(transition, dtype=torch.float).log()
    initial_weights = torch.tensor(initial, dtype=torch.float).log()
    final_weights = torch.tensor(final, dtype=torch.float).log()

    model = libfst.LatticeCrf(vocabulary,
                              transition_weights=transition_weights,
                              initial_weights=initial_weights,
                              final_weights=final_weights,
                              debug_output_dir=fig_dir,
                              **model_kwargs)

    gtn.draw(model._transition_fst,
             os.path.join(fig_dir, 'transitions-init.png'),
             isymbols=model._arc_symbols,
             osymbols=model._arc_symbols)

    gtn.draw(model._duration_fst,
             os.path.join(fig_dir, 'durations-init.png'),
             isymbols=model._arc_symbols,
             osymbols=model._arc_symbols)

    if True:
        for i, (inputs, targets, seq_id) in enumerate(train_loader):
            arc_scores = model.scores_to_arc(inputs)
            arc_labels = model.labels_to_arc(targets)

            batch_size, num_samples, num_classes = arc_scores.shape

            obs_fst = libfst.linearFstFromArray(arc_scores[0].reshape(
                num_samples, -1))
            gt_fst = libfst.fromSequence(arc_labels[0])
            d1_fst = gtn.compose(obs_fst, model._duration_fst)
            d1_fst = gtn.project_output(d1_fst)
            denom_fst = gtn.compose(d1_fst, model._transition_fst)
            # denom_fst = gtn.project_output(denom_fst)
            num_fst = gtn.compose(denom_fst, gt_fst)
            viterbi_fst = gtn.viterbi_path(denom_fst)
            pred_fst = gtn.remove(gtn.project_output(viterbi_fst))

            loss = gtn.subtract(gtn.forward_score(num_fst),
                                gtn.forward_score(denom_fst))
            loss = torch.tensor(loss.item())

            if torch.isinf(loss).any():
                denom_alt = gtn.compose(obs_fst, model._transition_fst)
                d1_min = gtn.remove(gtn.project_output(d1_fst))
                denom_alt = gtn.compose(d1_min, model._transition_fst)
                num_alt = gtn.compose(denom_alt, gt_fst)
                gtn.draw(obs_fst,
                         os.path.join(fig_dir, 'observations-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(gt_fst,
                         os.path.join(fig_dir, 'labels-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(d1_fst,
                         os.path.join(fig_dir, 'd1-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(d1_min,
                         os.path.join(fig_dir, 'd1-min-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(denom_fst,
                         os.path.join(fig_dir, 'denominator-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(denom_alt,
                         os.path.join(fig_dir, 'denominator-alt-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(num_fst,
                         os.path.join(fig_dir, 'numerator-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(num_alt,
                         os.path.join(fig_dir, 'numerator-alt-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(viterbi_fst,
                         os.path.join(fig_dir, 'viterbi-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                gtn.draw(pred_fst,
                         os.path.join(fig_dir, 'pred-init.png'),
                         isymbols=model._arc_symbols,
                         osymbols=model._arc_symbols)
                import pdb
                pdb.set_trace()

    # Train the model
    train_epoch_log = collections.defaultdict(list)
    val_epoch_log = collections.defaultdict(list)
    metric_dict = {
        'Avg Loss': metrics.AverageLoss(),
        'Accuracy': metrics.Accuracy()
    }

    criterion = model.nllLoss
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=1,
                                                gamma=1.00)

    model, last_model_wts = torchutils.trainModel(
        model,
        criterion,
        optimizer,
        scheduler,
        train_loader,
        val_loader,
        metrics=metric_dict,
        test_metric='Avg Loss',
        train_epoch_log=train_epoch_log,
        val_epoch_log=val_epoch_log,
        num_epochs=num_epochs)

    gtn.draw(model._transition_fst,
             os.path.join(fig_dir, 'transitions-trained.png'),
             isymbols=model._arc_symbols,
             osymbols=model._arc_symbols)
    gtn.draw(model._duration_fst,
             os.path.join(fig_dir, 'durations-trained.png'),
             isymbols=model._arc_symbols,
             osymbols=model._arc_symbols)

    torchutils.plotEpochLog(train_epoch_log,
                            title="Train Epoch Log",
                            fn=os.path.join(fig_dir, "train-log.png"))
Example #10
0
    def test_viterbi_path(self):

        g = gtn.Graph()

        # Empty graph gives empty path
        self.assertTrue(gtn.equal(gtn.viterbi_path(g), g))

        # Accepting empty string
        g.add_node(True, True)
        self.assertTrue(gtn.equal(gtn.viterbi_path(g), g))

        # A simple test case
        g = gtn.Graph()
        g.add_node(True)
        g.add_node()
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, 1)
        g.add_arc(0, 1, 1, 1, 2)
        g.add_arc(0, 1, 2, 2, 3)
        g.add_arc(1, 2, 0, 0, 1)
        g.add_arc(1, 2, 1, 1, 2)
        g.add_arc(1, 2, 2, 2, 3)

        best = gtn.Graph()
        best.add_node(True)
        best.add_node()
        best.add_node(False, True)
        best.add_arc(0, 1, 2, 2, 3)
        best.add_arc(1, 2, 2, 2, 3)

        path = gtn.viterbi_path(g)
        self.assertTrue(gtn.rand_equivalent(path, best))
        self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item())

        # Handle a single node.
        g = gtn.Graph()
        g.add_node(True, True)

        best = gtn.Graph()
        best.add_node(True, True)
        path = gtn.viterbi_path(g)
        self.assertTrue(gtn.rand_equivalent(path, best))
        self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item())

        # Handle two start nodes
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(True)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, -5)
        g.add_arc(0, 2, 0, 0, 1)
        g.add_arc(1, 2, 0, 0, 2)

        best = gtn.Graph()
        best.add_node(True)
        best.add_node(False, True)
        best.add_arc(0, 1, 0, 0, 2)

        path = gtn.viterbi_path(g)
        self.assertTrue(gtn.rand_equivalent(path, best))
        self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item())

        # Handle two accept nodes
        g = gtn.Graph()
        g.add_node(True)
        g.add_node(False, True)
        g.add_node(False, True)
        g.add_arc(0, 1, 0, 0, 3)
        g.add_arc(0, 2, 0, 0, 2)
        g.add_arc(1, 2, 0, 0, 2)

        best = gtn.Graph()
        best.add_node(True)
        best.add_node()
        best.add_node(False, True)
        best.add_arc(0, 1, 0, 0, 3)
        best.add_arc(1, 2, 0, 0, 2)

        path = gtn.viterbi_path(g)
        self.assertTrue(gtn.rand_equivalent(path, best))
        self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item())

        # A more complex test case
        g_str = [
            "0 1",
            "3 4",
            "0 1 0 0 2",
            "0 2 1 1 1",
            "1 2 0 0 2",
            "2 3 0 0 1",
            "2 3 1 1 1",
            "1 4 0 0 2",
            "2 4 1 1 3",
            "3 4 0 0 2",
        ]
        g = create_graph_from_text(g_str)

        # There are three options for the best path, the
        # viterbiPath may return any of them.
        best1 = gtn.Graph()
        best1.add_node(True)
        best1.add_node()
        best1.add_node()
        best1.add_node()
        best1.add_node(False, True)
        best1.add_arc(0, 1, 0, 0, 2)
        best1.add_arc(1, 2, 0, 0, 2)
        best1.add_arc(2, 3, 0, 0, 1)
        best1.add_arc(3, 4, 0, 0, 2)

        best2 = gtn.Graph()
        best2.add_node(True)
        best2.add_node()
        best2.add_node()
        best2.add_node()
        best2.add_node(False, True)
        best2.add_arc(0, 1, 0, 0, 2)
        best2.add_arc(1, 2, 0, 0, 2)
        best2.add_arc(2, 3, 1, 1, 1)
        best2.add_arc(3, 4, 0, 0, 2)

        best3 = gtn.Graph()
        best3.add_node(True)
        best3.add_node()
        best3.add_node()
        best3.add_node(False, True)
        best3.add_arc(0, 1, 0, 0, 2)
        best3.add_arc(1, 2, 0, 0, 2)
        best3.add_arc(2, 3, 1, 1, 3)

        path = gtn.viterbi_path(g)
        self.assertTrue(
            (
                gtn.rand_equivalent(path, best1)
                or gtn.rand_equivalent(path, best2)
                or gtn.rand_equivalent(path, best3)
            )
        )

        self.assertEqual(gtn.viterbi_score(path).item(), gtn.viterbi_score(g).item())