Ejemplo n.º 1
0
    def test(self):
        s0 = '''
            0 1 1 0.1
            0 2 2 0.2
            1 2 3 0.3
            1 3 -1 0.4
            2 3 -1 0.5
            2 1 5 0.55
            3
        '''
        s1 = '''
            0 1 -1 0.6
            1
        '''
        s2 = '''
            0 1 6 0.7
            1 0 7 0.8
            1 0 8 0.9
            1 2 -1 1.0
            2
        '''

        cpu_device = torch.device('cpu')
        cuda_device = torch.device('cuda', 0)

        for device in (cpu_device, cuda_device):
            fsa0 = k2.Fsa.from_str(s0)
            fsa1 = k2.Fsa.from_str(s1)
            fsa2 = k2.Fsa.from_str(s2)

            fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]).to_(device)

            fsa = k2.union(fsa_vec)
            assert torch.allclose(
                fsa.arcs.values()[:, :3],
                torch.tensor([
                    [0, 1, 0],  # fsa 0
                    [0, 4, 0],  # fsa 1
                    [0, 5, 0],  # fsa 2
                    # now for fsa0
                    [1, 2, 1],
                    [1, 3, 2],
                    [2, 3, 3],
                    [2, 7, -1],
                    [3, 7, -1],
                    [3, 2, 5],
                    # fsa1
                    [4, 7, -1],
                    # fsa2
                    [5, 6, 6],
                    [6, 5, 7],
                    [6, 5, 8],
                    [6, 7, -1]
                ]).to(torch.int32).to(device))
            assert torch.allclose(
                fsa.scores,
                torch.tensor([
                    0., 0., 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.55, 0.6, 0.7, 0.8,
                    0.9, 1.0
                ]).to(device))
Ejemplo n.º 2
0
def lexicon_fst(args):
    '''
    This programme create lexicon.fst.pdf and lexicon.fst.txt based on args.word_file
    input:
    args: name_space
    return:
    lexicon: k2.Fsa, lexicon fst
    output:
    lexicon.fst.txt and lexicon.fst.pdf in args.data_directory

    By lexicon fst, we compress the repeated chars in emission fst. 
    '''
    symbols_str = symboletable(args)
    symbols_paris = symbols_str.split('\n')
    num_noneps = len(symbols_paris) - 1
    symbol2fst = [None]  # <eps> has no fst
    for i in range(1, num_noneps + 1):
        s = '''
        0 1 %d %d 0.0
        1 1 %d 0 0.0
        1 2 -1 -1 0.0
        2
        ''' % (i, i, i)
        g = k2.Fsa.from_str(s, acceptor=False)

        symbol2fst.append(g)
    fst_vec = k2.create_fsa_vec(symbol2fst[1:])
    fst_union = k2.union(fst_vec)
    lexicon = k2.closure(fst_union)
    lexicon.draw(os.path.join(args.data_directory, 'lexicon.fst.pdf'), title='lexicon')
    # lexicon.symbols = k2.SymbolTable.from_str(symbols_str)
    # lexicon.aux_symbols = k2.SymbolTable.from_str(symbols_str)
    with open(os.path.join(args.data_directory, 'lexicon.fst.txt'), 'w') as f:
        f.write(k2.to_str(lexicon))
Ejemplo n.º 3
0
    def test(self):
        s0 = '''
            0 1 1 0.1
            0 2 2 0.2
            1 2 3 0.3
            1 3 -1 0.4
            2 3 -1 0.5
            2 1 5 0.55
            3
        '''
        s1 = '''
            0 1 -1 0.6
            1
        '''
        s2 = '''
            0 1 6 0.7
            1 0 7 0.8
            1 0 8 0.9
            1 2 -1 1.0
            2
        '''

        fsa0 = k2.Fsa.from_str(s0)
        fsa1 = k2.Fsa.from_str(s1)
        fsa2 = k2.Fsa.from_str(s2)

        fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2])

        fsa = k2.union(fsa_vec)
        dot = k2.to_dot(fsa)
        dot.render('/tmp/fsa', format='pdf')
        # the fsa is saved to /tmp/fsa.pdf
        print(fsa)
Ejemplo n.º 4
0
    def test_autograd(self):
        s0 = '''
            0 1 1 0.1
            0 2 2 0.2
            1 3 -1 0.3
            1 2 2 0.4
            2 3 -1 0.5
            3
        '''

        s1 = '''
            0 2 -1 0.6
            0 1 1 0.7
            1 2 -1 0.8
            2
        '''

        s2 = '''
            0 1 1 1.1
            1 2 -1 1.2
            2
        '''
        devices = [torch.device('cpu')]
        if torch.cuda.is_available():
            devices.append(torch.device('cuda', 0))

        for device in devices:
            fsa0 = k2.Fsa.from_str(s0).to(device).requires_grad_(True)
            fsa1 = k2.Fsa.from_str(s1).to(device).requires_grad_(True)
            fsa2 = k2.Fsa.from_str(s2).to(device).requires_grad_(True)

            fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2])
            fsa = k2.union(fsa_vec)
            fsa_vec = k2.create_fsa_vec([fsa])
            log_like = k2.get_tot_scores(fsa_vec,
                                         log_semiring=True,
                                         use_double_scores=False)
            # expected log_like and gradients are computed using gtn.
            # See https://bit.ly/35uVaUv
            log_like.backward()

            expected_log_like = torch.tensor([3.1136]).to(log_like)
            assert torch.allclose(log_like, expected_log_like)

            expected_grad_fsa0 = torch.tensor([
                0.18710044026374817, 0.08949274569749832, 0.06629786640405655,
                0.12080258131027222, 0.21029533445835114
            ]).to(device)

            expected_grad_fsa1 = torch.tensor([
                0.08097638934850693, 0.19916976988315582, 0.19916976988315582
            ]).to(device)

            expected_grad_fsa2 = torch.tensor(
                [0.4432605803012848, 0.4432605803012848]).to(device)

            assert torch.allclose(fsa0.grad, expected_grad_fsa0)
            assert torch.allclose(fsa1.grad, expected_grad_fsa1)
            assert torch.allclose(fsa2.grad, expected_grad_fsa2)
Ejemplo n.º 5
0
def _construct_f(fsa_vec: k2.Fsa) -> k2.Fsa:
    num_fsa = fsa_vec.shape[0]
    union = k2.union(fsa_vec)
    union.aux_labels = torch.zeros(union.num_arcs)
    union.aux_labels[0:num_fsa] = torch.tensor(list(range(1, 1 + num_fsa)),
                                               dtype=torch.int32)
    union_str = k2.to_str_simple(union)
    states_num = union.shape[0]

    new_str_array = []
    new_str_array.append("0 {} -1 0 0".format(states_num - 1))
    for line in union_str.strip().split("\n"):
        tokens = line.strip().split(" ")
        if len(tokens) == 5:
            tokens[1] = '0' if int(tokens[1]) == states_num - 1 else tokens[1]
            tokens[2] = '0' if int(tokens[2]) == -1 else tokens[2]
        new_str_array.append(" ".join(tokens))
    new_str = "\n".join(new_str_array)

    new_fsa = k2.Fsa.from_str(new_str, num_aux_labels=1)
    new_fsa_invert = k2.invert(new_fsa)
    return new_fsa_invert
Ejemplo n.º 6
0
    def test(self):
        s0 = '''
            0 1 1 0.1
            0 2 2 0.2
            1 2 3 0.3
            1 3 -1 0.4
            2 3 -1 0.5
            2 1 5 0.55
            3
        '''
        s1 = '''
            0 1 -1 0.6
            1
        '''
        s2 = '''
            0 1 6 0.7
            1 0 7 0.8
            1 0 8 0.9
            1 2 -1 1.0
            2
        '''
        for device in self.devices:
            fsa0 = k2.Fsa.from_str(s0)
            fsa1 = k2.Fsa.from_str(s1)
            fsa2 = k2.Fsa.from_str(s2)

            fsa0.tensor_attr = torch.tensor([1, 2, 3, 4, 5, 6],
                                            dtype=torch.int32,
                                            device=device)
            fsa0.ragged_tensor_attr = k2.RaggedTensor(
                fsa0.tensor_attr.unsqueeze(-1))

            fsa1.tensor_attr = torch.tensor([7],
                                            dtype=torch.int32,
                                            device=device)

            fsa1.ragged_tensor_attr = k2.RaggedTensor(
                fsa1.tensor_attr.unsqueeze(-1))

            fsa2.tensor_attr = torch.tensor([8, 9, 10, 11],
                                            dtype=torch.int32,
                                            device=device)

            fsa2.ragged_tensor_attr = k2.RaggedTensor(
                fsa2.tensor_attr.unsqueeze(-1))

            fsa_vec = k2.create_fsa_vec([fsa0, fsa1, fsa2]).to(device)

            fsa = k2.union(fsa_vec)

            expected_tensor_attr = torch.tensor(
                [0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                 11]).to(fsa.tensor_attr)
            assert torch.all(torch.eq(fsa.tensor_attr, expected_tensor_attr))

            expected_ragged_tensor_attr = k2.RaggedTensor(
                expected_tensor_attr.unsqueeze(-1)).remove_values_eq(0)
            assert str(expected_ragged_tensor_attr) == str(
                fsa.ragged_tensor_attr)

            assert torch.allclose(
                fsa.arcs.values()[:, :3],
                torch.tensor([
                    [0, 1, 0],  # fsa 0
                    [0, 4, 0],  # fsa 1
                    [0, 5, 0],  # fsa 2
                    # now for fsa0
                    [1, 2, 1],
                    [1, 3, 2],
                    [2, 3, 3],
                    [2, 7, -1],
                    [3, 7, -1],
                    [3, 2, 5],
                    # fsa1
                    [4, 7, -1],
                    # fsa2
                    [5, 6, 6],
                    [6, 5, 7],
                    [6, 5, 8],
                    [6, 7, -1]
                ]).to(torch.int32).to(device))
            assert torch.allclose(
                fsa.scores,
                torch.tensor([
                    0., 0., 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.55, 0.6, 0.7, 0.8,
                    0.9, 1.0
                ]).to(device))