Beispiel #1
0
    def test_acceptor_wo_arcs_from_openfst(self):
        s1 = '''
        '''

        s2 = '''
            0 Inf
            1 0.1
        '''

        s3 = '''
            0 Inf
            1 0.1
            2 0.2
        '''

        for device in self.devices:
            fsa1 = k2.Fsa.from_openfst(s1)
            print("fsa1 = ", k2.to_str_simple(fsa1))
            self.assertEqual('', k2.to_str_simple(fsa1))

            fsa2 = k2.Fsa.from_openfst(s2)
            self.assertEqual(_remove_leading_spaces(k2.to_str_simple(fsa2)),
                             "1 2 -1 -0.1\n2")
            arcs2 = fsa2.arcs.values()[:, :-1]
            assert torch.all(
                torch.eq(arcs2, torch.tensor([[1, 2, -1]], dtype=torch.int32)))

            fsa3 = k2.Fsa.from_openfst(s3)
            self.assertEqual(fsa3.arcs.dim0(), 4)
            self.assertEqual(_remove_leading_spaces(k2.to_str_simple(fsa3)),
                             "1 3 -1 -0.1\n2 3 -1 -0.2\n3")
Beispiel #2
0
    def test_simplified(self):
        for device in self.devices:
            s = '''
            [ [1 2 2] [1 2 3] ]
            '''
            ragged_int = k2.RaggedInt(s).to(device)
            fsa_vec_ragged = k2.ctc_graph(ragged_int, True)

            fsa_vec = k2.ctc_graph([[1, 2, 2], [1, 2, 3]], True, device)
            expected_str0 = '\n'.join([
                '0 0 0 0 0', '0 1 1 1 0', '1 2 0 0 0', '1 1 1 0 0',
                '1 3 2 2 0', '2 2 0 0 0', '2 3 2 2 0', '3 4 0 0 0',
                '3 3 2 0 0', '3 5 2 2 0', '4 4 0 0 0', '4 5 2 2 0',
                '5 6 0 0 0', '5 5 2 0 0', '5 7 -1 0 0', '6 6 0 0 0',
                '6 7 -1 0 0', '7'
            ])
            expected_str1 = '\n'.join([
                '0 0 0 0 0', '0 1 1 1 0', '1 2 0 0 0', '1 1 1 0 0',
                '1 3 2 2 0', '2 2 0 0 0', '2 3 2 2 0', '3 4 0 0 0',
                '3 3 2 0 0', '3 5 3 3 0', '4 4 0 0 0', '4 5 3 3 0',
                '5 6 0 0 0', '5 5 3 0 0', '5 7 -1 0 0', '6 6 0 0 0',
                '6 7 -1 0 0', '7'
            ])
            actual_str_ragged0 = k2.to_str_simple(fsa_vec_ragged[0].to('cpu'))
            actual_str_ragged1 = k2.to_str_simple(fsa_vec_ragged[1].to('cpu'))
            actual_str0 = k2.to_str_simple(fsa_vec[0].to('cpu'))
            actual_str1 = k2.to_str_simple(fsa_vec[1].to('cpu'))
            assert actual_str0.strip() == expected_str0
            assert actual_str1.strip() == expected_str1
            assert actual_str_ragged0.strip() == expected_str0
            assert actual_str_ragged1.strip() == expected_str1
Beispiel #3
0
    def test_acceptor_from_openfst(self):
        s = '''
            0 1  2 -1.2
            0 2  10 -2.2
            1 6  1  -3.2
            1 3  3  -4.2
            2 6  2  -5.2
            2 4  2  -6.2
            3 6  3  -7.2
            5 0  1  -8.2
            7
            6 -9.2
        '''

        for i in range(4):
            if i == 0:
                fsa = k2.Fsa.from_openfst(s)
            elif i == 1:
                fsa = k2.Fsa.from_openfst(s, acceptor=True)
            elif i == 2:
                fsa = k2.Fsa.from_openfst(s, num_aux_labels=0)
            else:
                fsa = k2.Fsa.from_openfst(s, aux_label_names=[])

            expected_str = '''
            0 1 2 -1.2
            0 2 10 -2.2
            1 6 1 -3.2
            1 3 3 -4.2
            2 6 2 -5.2
            2 4 2 -6.2
            3 6 3 -7.2
            5 0 1 -8.2
            6 8 -1 -9.2
            7 8 -1 0
            8
            '''
            assert _remove_leading_spaces(expected_str) == \
                    _remove_leading_spaces(k2.to_str_simple(fsa, openfst=True))

            arcs = fsa.arcs.values()[:, :-1]
            assert isinstance(arcs, torch.Tensor)
            assert arcs.dtype == torch.int32
            assert arcs.device.type == 'cpu'
            assert arcs.shape == (10, 3), 'there should be 10 arcs'
            assert torch.all(
                torch.eq(arcs[0], torch.tensor([0, 1, 2], dtype=torch.int32)))

            assert torch.allclose(
                fsa.scores,
                torch.tensor([1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 0],
                             dtype=torch.float32))

            fsa.scores *= -1

            assert torch.allclose(
                fsa.scores,
                torch.tensor(
                    [-1.2, -2.2, -3.2, -4.2, -5.2, -6.2, -7.2, -8.2, -9.2, 0],
                    dtype=torch.float32))
    def test(self):
        for device in self.devices:
            for score in [-0.5, -0.501, -0.502]:
                s = '''
                [ [1 2 3] [ ] [4 5 6] ]
                '''
                ragged_int = k2.RaggedTensor(s).to(device)
                fsa_vec_ragged = k2.levenshtein_graph(ragged_int,
                                                      ins_del_score=score)

                fsa_vec = k2.levenshtein_graph([[1, 2, 3], [], [4, 5, 6]],
                                               device=device,
                                               ins_del_score=score)

                expected_str0 = '\n'.join([
                    f'0 0 0 0 {score}', '0 1 0 1 -0.5', '0 1 1 1 0',
                    f'1 1 0 0 {score}', '1 2 0 2 -0.5', '1 2 2 2 0',
                    f'2 2 0 0 {score}', '2 3 0 3 -0.5', '2 3 3 3 0',
                    f'3 3 0 0 {score}', '3 4 -1 -1 0', '4'
                ])
                expected_str1 = '\n'.join(
                    [f'0 0 0 0 {score}', '0 1 -1 -1 0', '1'])
                expected_str2 = '\n'.join([
                    f'0 0 0 0 {score}', '0 1 0 4 -0.5', '0 1 4 4 0',
                    f'1 1 0 0 {score}', '1 2 0 5 -0.5', '1 2 5 5 0',
                    f'2 2 0 0 {score}', '2 3 0 6 -0.5', '2 3 6 6 0',
                    f'3 3 0 0 {score}', '3 4 -1 -1 0', '4'
                ])
                actual_str_ragged0 = k2.to_str_simple(
                    fsa_vec_ragged[0].to('cpu'))
                actual_str_ragged1 = k2.to_str_simple(
                    fsa_vec_ragged[1].to('cpu'))
                actual_str_ragged2 = k2.to_str_simple(
                    fsa_vec_ragged[2].to('cpu'))
                actual_str0 = k2.to_str_simple(fsa_vec[0].to('cpu'))
                actual_str1 = k2.to_str_simple(fsa_vec[1].to('cpu'))
                actual_str2 = k2.to_str_simple(fsa_vec[2].to('cpu'))
                assert actual_str0.strip() == expected_str0
                assert actual_str1.strip() == expected_str1
                assert actual_str2.strip() == expected_str2
                assert actual_str_ragged0.strip() == expected_str0
                assert actual_str_ragged1.strip() == expected_str1
                assert actual_str_ragged2.strip() == expected_str2

                offset_value = score - (-0.5)
                expected_offset = torch.tensor([
                    offset_value, 0, 0, offset_value, 0, 0, offset_value, 0, 0,
                    offset_value, 0, offset_value, 0, offset_value, 0, 0,
                    offset_value, 0, 0, offset_value, 0, 0, offset_value, 0
                ],
                                               dtype=torch.float32)

                offset_ragged = getattr(
                    fsa_vec_ragged, "__ins_del_score_offset_internal_attr_")
                offset_ragged = offset_ragged.to('cpu')
                offset = getattr(
                    fsa_vec, "__ins_del_score_offset_internal_attr_").to('cpu')
                assert torch.allclose(expected_offset, offset_ragged)
                assert torch.allclose(expected_offset, offset)
Beispiel #5
0
    def test_treat_epsilon_specially_true(self):
        # this version works only on CPU and requires
        # arc-sorted inputs
        # a_fsa recognizes `(1|3)?2*`
        s1 = '''
            0 1 3 0.0
            0 1 1 0.2
            0 1 0 0.1
            1 1 2 0.3
            1 2 -1 0.4
            2
        '''
        a_fsa = k2.Fsa.from_str(s1)
        a_fsa.requires_grad_(True)

        # b_fsa recognizes `1|2|5`
        s2 = '''
            0 1 5 0
            0 1 1 1
            0 1 2 2
            1 2 -1 3
            2
        '''
        b_fsa = k2.Fsa.from_str(s2)
        b_fsa.requires_grad_(True)

        # fsa recognizes 1|2
        fsa = k2.intersect(k2.arc_sort(a_fsa), k2.arc_sort(b_fsa))
        assert len(fsa.shape) == 2
        actual_str = k2.to_str_simple(fsa)
        expected_str = '\n'.join(
            ['0 1 0 0.1', '0 2 1 1.2', '1 2 2 2.3', '2 3 -1 3.4', '3'])
        assert actual_str.strip() == expected_str

        loss = fsa.scores.sum()
        (-loss).backward()
        # arc 1, 2, 3, and 4 of a_fsa are kept in the final intersected FSA
        assert torch.allclose(a_fsa.grad,
                              torch.tensor([0, -1, -1, -1, -1]).to(a_fsa.grad))

        # arc 1, 2, and 3 of b_fsa are kept in the final intersected FSA
        assert torch.allclose(b_fsa.grad,
                              torch.tensor([0, -1, -1, -1]).to(b_fsa.grad))

        # if any of the input FSA is an FsaVec,
        # the outupt FSA is also an FsaVec.
        a_fsa.scores.grad = None
        b_fsa.scores.grad = None
        a_fsa = k2.create_fsa_vec([a_fsa])
        fsa = k2.intersect(k2.arc_sort(a_fsa), k2.arc_sort(b_fsa))
        assert len(fsa.shape) == 3
Beispiel #6
0
    def test_treat_epsilon_specially_false(self):
        devices = [torch.device('cpu')]
        if torch.cuda.is_available() and k2.with_cuda:
            devices.append(torch.device('cuda'))

        for device in devices:
            # a_fsa recognizes `(0|1)2*`
            s1 = '''
                0 1 0 0.1
                0 1 1 0.2
                1 1 2 0.3
                1 2 -1 0.4
                2
            '''
            a_fsa = k2.Fsa.from_str(s1).to(device)
            a_fsa.requires_grad_(True)

            # b_fsa recognizes `1|2`
            s2 = '''
                0 1 1 1
                0 1 2 2
                1 2 -1 3
                2
            '''
            b_fsa = k2.Fsa.from_str(s2).to(device)
            b_fsa.requires_grad_(True)

            # fsa recognizes `1`
            fsa = k2.intersect(a_fsa, b_fsa, treat_epsilons_specially=False)
            assert len(fsa.shape) == 2
            actual_str = k2.to_str_simple(fsa)
            expected_str = '\n'.join(['0 1 1 1.2', '1 2 -1 3.4', '2'])
            assert actual_str.strip() == expected_str

            loss = fsa.scores.sum()
            (-loss).backward()
            # arc 1 and 3 of a_fsa are kept in the final intersected FSA
            assert torch.allclose(a_fsa.grad,
                                  torch.tensor([0, -1, 0, -1]).to(a_fsa.grad))

            # arc 0 and 2 of b_fsa are kept in the final intersected FSA
            assert torch.allclose(b_fsa.grad,
                                  torch.tensor([-1, 0, -1]).to(b_fsa.grad))

            # if any of the input FSA is an FsaVec,
            # the outupt FSA is also an FsaVec.
            a_fsa.scores.grad = None
            b_fsa.scores.grad = None
            a_fsa = k2.create_fsa_vec([a_fsa])
            fsa = k2.intersect(a_fsa, b_fsa, treat_epsilons_specially=False)
            assert len(fsa.shape) == 3
Beispiel #7
0
    def test_transducer2_from_str(self):
        s = '''
            0 1  2  22  101 -1.2
            0 2  10 100 102 -2.2
            1 6 -1  16  103 -4.2
            1 3  3  33  104 -3.2
            2 6 -1  26  105 -5.2
            2 4  2  22  106 -6.2
            3 6 -1  36  107 -7.2
            5 0  1  50  108 -8.2
            6
        '''
        for i in range(2):
            if i == 0:
                fsa = k2.Fsa.from_str(s, num_aux_labels=2)
            else:
                fsa = k2.Fsa.from_str(
                    s, aux_label_names=['aux_labels', 'aux_labels2'])
            assert fsa.aux_labels.dtype == torch.int32
            assert fsa.aux_labels.device.type == 'cpu'
            assert torch.all(
                torch.eq(
                    fsa.aux_labels,
                    torch.tensor([22, 100, 16, 33, 26, 22, 36, 50],
                                 dtype=torch.int32)))
            assert torch.all(
                torch.eq(
                    fsa.aux_labels2,
                    torch.tensor([101, 102, 103, 104, 105, 106, 107, 108],
                                 dtype=torch.int32)))

            assert torch.allclose(
                fsa.scores,
                torch.tensor([-1.2, -2.2, -4.2, -3.2, -5.2, -6.2, -7.2, -8.2],
                             dtype=torch.float32))

            # only aux_labels will be printed right now..
            expected_str = '''
            0 1 2 22 -1.2
            0 2 10 100 -2.2
            1 6 -1 16 -4.2
            1 3 3 33 -3.2
            2 6 -1 26 -5.2
            2 4 2 22 -6.2
            3 6 -1 36 -7.2
            5 0 1 50 -8.2
            6
            '''
            assert _remove_leading_spaces(expected_str) == \
                    _remove_leading_spaces(k2.to_str_simple(fsa))
Beispiel #8
0
    def test_transducer_from_openfst(self):
        s = '''
            0 1 2 22  -1.2
            0 2  10 100 -2.2
            1 6  1  16  -4.2
            1 3  3  33  -3.2
            2 6  2  26  -5.2
            2 4  2  22  -6.2
            3 6  3  36  -7.2
            5 0  1  50  -8.2
            7 -9.2
            6
        '''
        for i in range(3):
            if i == 0:
                fsa = k2.Fsa.from_openfst(s, acceptor=False)
            elif i == 1:
                fsa = k2.Fsa.from_openfst(s, num_aux_labels=1)
            else:
                fsa = k2.Fsa.from_openfst(s, aux_label_names=['aux_labels'])

            assert fsa.aux_labels.dtype == torch.int32
            assert fsa.aux_labels.device.type == 'cpu'
            assert torch.all(
                torch.eq(
                    fsa.aux_labels,
                    torch.tensor([22, 100, 16, 33, 26, 22, 36, 50, -1, -1],
                                 dtype=torch.int32)))

            assert torch.allclose(
                fsa.scores,
                torch.tensor([1.2, 2.2, 4.2, 3.2, 5.2, 6.2, 7.2, 8.2, 0, 9.2],
                             dtype=torch.float32))

            expected_str = '''
                0 1 2 22 -1.2
                0 2 10 100 -2.2
                1 6 1 16 -4.2
                1 3 3 33 -3.2
                2 6 2 26 -5.2
                2 4 2 22 -6.2
                3 6 3 36 -7.2
                5 0 1 50 -8.2
                6 8 -1 -1 0
                7 8 -1 -1 -9.2
                8
            '''
            assert _remove_leading_spaces(expected_str) == \
                    _remove_leading_spaces(k2.to_str_simple(fsa, openfst=True))
Beispiel #9
0
    def test_acceptor_from_tensor(self):
        fsa_tensor = torch.tensor([[0, 1, 2, _k2.float_as_int(-1.2)],
                                   [0, 2, 10, _k2.float_as_int(-2.2)],
                                   [1, 6, -1, _k2.float_as_int(-3.2)],
                                   [1, 3, 3, _k2.float_as_int(-4.2)],
                                   [2, 6, -1, _k2.float_as_int(-5.2)],
                                   [2, 4, 2, _k2.float_as_int(-6.2)],
                                   [3, 6, -1, _k2.float_as_int(-7.2)],
                                   [5, 0, 1, _k2.float_as_int(-8.2)]],
                                  dtype=torch.int32)

        fsa = k2.Fsa(fsa_tensor)

        expected_str = '''
            0 1 2 -1.2
            0 2 10 -2.2
            1 6 -1 -3.2
            1 3 3 -4.2
            2 6 -1 -5.2
            2 4 2 -6.2
            3 6 -1 -7.2
            5 0 1 -8.2
            6
        '''
        assert _remove_leading_spaces(expected_str) == \
                _remove_leading_spaces(k2.to_str_simple(fsa))

        arcs = fsa.arcs.values()[:, :-1]
        assert isinstance(arcs, torch.Tensor)
        assert arcs.dtype == torch.int32
        assert arcs.device.type == 'cpu'
        assert arcs.shape == (8, 3), 'there should be 8 arcs'
        assert torch.all(
            torch.eq(arcs[0], torch.tensor([0, 1, 2], dtype=torch.int32)))

        assert torch.allclose(
            fsa.scores,
            torch.tensor([-1.2, -2.2, -3.2, -4.2, -5.2, -6.2, -7.2, -8.2],
                         dtype=torch.float32))

        fsa.scores *= -1

        assert torch.allclose(
            fsa.scores,
            torch.tensor([1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2],
                         dtype=torch.float32))
Beispiel #10
0
    def test_transducer_from_str(self):
        s = '''
            0 1  2  22  -1.2
            0 2  10 100 -2.2
            1 6 -1  16  -4.2
            1 3  3  33  -3.2
            2 6 -1  26  -5.2
            2 4  2  22  -6.2
            3 6 -1  36  -7.2
            5 0  1  50  -8.2
            6
        '''
        for i in range(3):
            if i == 0:
                fsa = k2.Fsa.from_str(s, num_aux_labels=1)
            elif i == 1:
                fsa = k2.Fsa.from_str(s, acceptor=False)
            else:
                fsa = k2.Fsa.from_str(s, aux_label_names=['aux_labels'])
            assert fsa.aux_labels.dtype == torch.int32
            assert fsa.aux_labels.device.type == 'cpu'
            assert torch.all(
                torch.eq(
                    fsa.aux_labels,
                    torch.tensor([22, 100, 16, 33, 26, 22, 36, 50],
                                 dtype=torch.int32)))

            assert torch.allclose(
                fsa.scores,
                torch.tensor([-1.2, -2.2, -4.2, -3.2, -5.2, -6.2, -7.2, -8.2],
                             dtype=torch.float32))

            expected_str = '''
            0 1 2 22 -1.2
            0 2 10 100 -2.2
            1 6 -1 16 -4.2
            1 3 3 33 -3.2
            2 6 -1 26 -5.2
            2 4 2 22 -6.2
            3 6 -1 36 -7.2
            5 0 1 50 -8.2
            6
            '''
            assert _remove_leading_spaces(expected_str) == \
                    _remove_leading_spaces(k2.to_str_simple(fsa))
Beispiel #11
0
    def test(self):
        s = '''
            0 1 1 0.1
            0 2 2 0.2
            1 4 -1 0.3
            3 4 -1 0.4
            4
        '''
        fsa = k2.Fsa.from_str(s)
        fsa.requires_grad_(True)
        expected_str = '\n'.join(['0 1 1 0.1', '1 2 -1 0.3', '2'])
        connected_fsa = k2.connect(fsa)
        actual_str = k2.to_str_simple(connected_fsa)
        assert actual_str.strip() == expected_str

        loss = connected_fsa.scores.sum()
        loss.backward()
        assert torch.allclose(fsa.scores.grad,
                              torch.tensor([1, 0, 1, 0], dtype=torch.float32))
Beispiel #12
0
    def test(self):
        s = '''
            0 1 2 0.1
            0 1 1 0.2
            1 2 -1 0.3
            2
        '''
        fsa = k2.Fsa.from_str(s)
        fsa.requires_grad_(True)
        sorted_fsa = k2.arc_sort(fsa)

        actual_str = k2.to_str_simple(sorted_fsa)
        expected_str = '\n'.join(['0 1 1 0.2', '0 1 2 0.1', '1 2 -1 0.3', '2'])
        assert actual_str.strip() == expected_str

        loss = (sorted_fsa.scores[1] + sorted_fsa.scores[2]) / 2
        loss.backward()
        assert torch.allclose(fsa.scores.grad,
                              torch.tensor([0.5, 0, 0.5], dtype=torch.float32))
Beispiel #13
0
    def test_transducer_from_tensor(self):
        for device in self.devices:
            fsa_tensor = torch.tensor(
                [[0, 1, 2, _k2.float_as_int(-1.2)],
                 [0, 2, 10, _k2.float_as_int(-2.2)],
                 [1, 6, -1, _k2.float_as_int(-4.2)],
                 [1, 3, 3, _k2.float_as_int(-3.2)],
                 [2, 6, -1, _k2.float_as_int(-5.2)],
                 [2, 4, 2, _k2.float_as_int(-6.2)],
                 [3, 6, -1, _k2.float_as_int(-7.2)],
                 [5, 0, 1, _k2.float_as_int(-8.2)]],
                dtype=torch.int32).to(device)
            aux_labels_tensor = torch.tensor([22, 100, 16, 33, 26, 22, 36, 50],
                                             dtype=torch.int32).to(device)
            fsa = k2.Fsa(fsa_tensor, aux_labels_tensor)
            assert fsa.aux_labels.dtype == torch.int32
            assert fsa.aux_labels.device.type == device.type
            assert torch.all(
                torch.eq(
                    fsa.aux_labels,
                    torch.tensor([22, 100, 16, 33, 26, 22, 36, 50],
                                 dtype=torch.int32).to(device)))

            assert torch.allclose(
                fsa.scores,
                torch.tensor([-1.2, -2.2, -4.2, -3.2, -5.2, -6.2, -7.2, -8.2],
                             dtype=torch.float32,
                             device=device))

            expected_str = '''
                0 1 2 22 -1.2
                0 2 10 100 -2.2
                1 6 -1 16 -4.2
                1 3 3 33 -3.2
                2 6 -1 26 -5.2
                2 4 2 22 -6.2
                3 6 -1 36 -7.2
                5 0 1 50 -8.2
                6
            '''
            assert _remove_leading_spaces(expected_str) == \
                    _remove_leading_spaces(k2.to_str_simple(fsa))
Beispiel #14
0
    def test_acceptor_wo_arcs_from_str(self):
        s1 = '''
        '''

        s2 = '''
            0
            1
        '''

        s3 = '''
            1
        '''

        for device in self.devices:
            fsa1 = k2.Fsa.from_str(s1)
            self.assertEqual(k2.to_str_simple(fsa1), '')

            with self.assertRaises(ValueError):
                _ = k2.Fsa.from_str(s2)

            fsa3 = k2.Fsa.from_str(s3)
            self.assertEqual(fsa3.arcs.dim0(), 2)
Beispiel #15
0
def _construct_f(fsa_vec: k2.Fsa) -> k2.Fsa:
    num_fsa = fsa_vec.shape[0]
    union = k2.union(fsa_vec)
    union.aux_labels = torch.zeros(union.num_arcs)
    union.aux_labels[0:num_fsa] = torch.tensor(list(range(1, 1 + num_fsa)),
                                               dtype=torch.int32)
    union_str = k2.to_str_simple(union)
    states_num = union.shape[0]

    new_str_array = []
    new_str_array.append("0 {} -1 0 0".format(states_num - 1))
    for line in union_str.strip().split("\n"):
        tokens = line.strip().split(" ")
        if len(tokens) == 5:
            tokens[1] = '0' if int(tokens[1]) == states_num - 1 else tokens[1]
            tokens[2] = '0' if int(tokens[2]) == -1 else tokens[2]
        new_str_array.append(" ".join(tokens))
    new_str = "\n".join(new_str_array)

    new_fsa = k2.Fsa.from_str(new_str, num_aux_labels=1)
    new_fsa_invert = k2.invert(new_fsa)
    return new_fsa_invert
Beispiel #16
0
    def test(self):
        for device in self.devices:
            for use_identity_map, sorted_match_a in [(True, True),
                                                     (False, True),
                                                     (True, False),
                                                     (False, False)]:
                # recognizes (0|1)(0|2)
                s1 = '''
                    0 1 0 0.1
                    0 1 1 0.2
                    1 2 0 0.4
                    1 2 2 0.3
                    2 3 -1 0.5
                    3
                '''

                # recognizes 02*
                s2 = '''
                    0 1 0 1
                    1 1 2 2
                    1 2 -1 3
                    2
                '''

                # recognizes 1*0
                s3 = '''
                    0 0 1 10
                    0 1 0 20
                    1 2 -1 30
                    2
                '''
                a_fsa = k2.Fsa.from_str(s1).to(device)
                b_fsa_1 = k2.Fsa.from_str(s2).to(device)
                b_fsa_2 = k2.Fsa.from_str(s3).to(device)

                a_fsa.requires_grad_(True)
                b_fsa_1.requires_grad_(True)
                b_fsa_2.requires_grad_(True)

                b_fsas = k2.create_fsa_vec([b_fsa_1, b_fsa_2])
                if use_identity_map:
                    a_fsas = k2.create_fsa_vec([a_fsa, a_fsa])
                    b_to_a_map = torch.tensor([0, 1],
                                              dtype=torch.int32).to(device)
                else:
                    a_fsas = k2.create_fsa_vec([a_fsa])
                    b_to_a_map = torch.tensor([0, 0],
                                              dtype=torch.int32).to(device)

                c_fsas = k2.intersect_device(a_fsas, b_fsas, b_to_a_map,
                                             sorted_match_a)
                assert c_fsas.shape == (2, None, None)
                c_fsas = k2.connect(c_fsas.to('cpu'))
                # c_fsas[0] recognizes: 02
                # c_fsas[1] recognizes: 10

                actual_str_0 = k2.to_str_simple(c_fsas[0])
                expected_str_0 = '\n'.join(
                    ['0 1 0 1.1', '1 2 2 2.3', '2 3 -1 3.5', '3'])
                assert actual_str_0.strip() == expected_str_0

                actual_str_1 = k2.to_str_simple(c_fsas[1])
                expected_str_1 = '\n'.join(
                    ['0 1 1 10.2', '1 2 0 20.4', '2 3 -1 30.5', '3'])
                assert actual_str_1.strip() == expected_str_1

                loss = c_fsas.scores.sum()
                (-loss).backward()
                assert torch.allclose(
                    a_fsa.grad,
                    torch.tensor([-1, -1, -1, -1, -2]).to(a_fsa.grad))
                assert torch.allclose(
                    b_fsa_1.grad,
                    torch.tensor([-1, -1, -1]).to(b_fsa_1.grad))
                assert torch.allclose(
                    b_fsa_2.grad,
                    torch.tensor([-1, -1, -1]).to(b_fsa_2.grad))
    def test_single_fsa(self):
        for device in self.devices:
            # See https://git.io/JY7r4
            s = '''
                0 1 0 0.1
                0 2 0 0.2
                0 0 0 0.3
                1 1 0 0.4
                1 2 0 0.5
                2 3 -1 0.6
                3
            '''
            src = k2.Fsa.from_str(s).to(device).requires_grad_(True)
            scores_copy = src.scores.detach().clone().requires_grad_(True)

            src.attr1 = "hello"
            src.attr2 = "k2"
            float_attr = torch.tensor([0.1, 0.2, 0.3, 4, 5, 6],
                                      dtype=torch.float32,
                                      requires_grad=True,
                                      device=device)

            src.float_attr = float_attr.detach().clone().requires_grad_(True)
            src.int_attr = torch.tensor([1, 2, 3, 4, 5, 6],
                                        dtype=torch.int32,
                                        device=device)
            src.ragged_attr = k2.RaggedTensor([[10, 20], [30, 40,
                                                          50], [60, 70], [80],
                                               [], [0]]).to(device)

            dest = k2.remove_epsilon_self_loops(src)
            # arc map is [0, 1, 4, 5]

            # See https://git.io/JY7oC
            expected_fsa = k2.Fsa.from_str('''
                0 1 0 0.1
                0 2 0 0.2
                1 2 0 0.5
                2 3 -1 0.6
                3
            ''')
            assert k2.to_str_simple(dest) == k2.to_str_simple(
                expected_fsa), f'{str(dest)}\n{str(expected_fsa)}'

            assert dest.attr1 == src.attr1
            assert dest.attr2 == src.attr2

            expected_int_attr = torch.tensor([1, 2, 5, 6],
                                             dtype=torch.int32,
                                             device=device)
            assert torch.all(torch.eq(dest.int_attr, expected_int_attr))

            expected_ragged_attr = k2.RaggedTensor([[10, 20], [30, 40, 50], [],
                                                    [0]]).to(device)
            assert dest.ragged_attr == expected_ragged_attr

            expected_float_attr = torch.empty_like(dest.float_attr)
            expected_float_attr[0] = float_attr[0]
            expected_float_attr[1] = float_attr[1]
            expected_float_attr[2] = float_attr[4]
            expected_float_attr[3] = float_attr[5]

            assert torch.all(torch.eq(dest.float_attr, expected_float_attr))

            expected_scores = torch.empty_like(dest.scores)
            expected_scores[0] = scores_copy[0]
            expected_scores[1] = scores_copy[1]
            expected_scores[2] = scores_copy[4]
            expected_scores[3] = scores_copy[5]

            assert torch.all(torch.eq(dest.scores, expected_scores))

            scale = torch.tensor([10, 20, 30, 40]).to(float_attr)

            (dest.float_attr * scale).sum().backward()
            (expected_float_attr * scale).sum().backward()
            assert torch.all(torch.eq(src.float_attr.grad, float_attr.grad))

            (dest.scores * scale).sum().backward()
            (expected_scores * scale).sum().backward()
            assert torch.all(torch.eq(src.scores.grad, scores_copy.grad))
    def test_fsa_vec(self):
        for device in self.devices:
            # See https://git.io/JY7r4
            s = '''
                0 1 0 0.1
                0 2 0 0.2
                0 0 0 0.3
                1 1 0 0.4
                1 2 0 0.5
                2 3 -1 0.6
                3
            '''
            fsa1 = k2.Fsa.from_str(s).to(device).requires_grad_(True)
            scores_copy1 = fsa1.scores.detach().clone().requires_grad_(True)
            fsa1.attr1 = "hello"
            float_attr1 = torch.tensor([0.1, 0.2, 0.3, 4, 5, 6],
                                       dtype=torch.float32,
                                       requires_grad=True,
                                       device=device)
            fsa1.float_attr = float_attr1
            fsa1.int_attr = torch.tensor([1, 2, 3, 4, 5, 6],
                                         dtype=torch.int32,
                                         device=device)
            fsa1.ragged_attr = k2.RaggedTensor([[10, 20], [30, 40,
                                                           50], [60, 70], [80],
                                                [], [0]]).to(device)

            fsa2 = k2.Fsa.from_str(s).to(device).requires_grad_(True)
            scores_copy2 = fsa2.scores.detach().clone().requires_grad_(True)
            fsa2.attr2 = "k2"
            float_attr2 = torch.tensor([1, 2, 3, 40, 50, 60],
                                       dtype=torch.float32,
                                       requires_grad=True,
                                       device=device)
            fsa2.float_attr = float_attr2
            fsa2.int_attr = torch.tensor([10, 20, 30, 4, 5, 6],
                                         dtype=torch.int32,
                                         device=device)
            fsa2.ragged_attr = k2.RaggedTensor([[100, 200], [300, 400, 500],
                                                [600, 700], [800], [22],
                                                [33, 55]]).to(device)

            src = k2.create_fsa_vec([fsa1, fsa2])

            dest = k2.remove_epsilon_self_loops(src)
            # arc map is[0, 1, 4, 5, 6, 7, 10, 11]

            # See https://git.io/JY7oC
            expected_fsa = k2.Fsa.from_str('''
                0 1 0 0.1
                0 2 0 0.2
                1 2 0 0.5
                2 3 -1 0.6
                3
            ''')
            assert k2.to_str_simple(dest[0]) == k2.to_str_simple(expected_fsa)
            assert k2.to_str_simple(dest[1]) == k2.to_str_simple(expected_fsa)

            assert dest.attr1 == fsa1.attr1
            assert dest.attr2 == fsa2.attr2

            expected_int_attr = torch.tensor([1, 2, 5, 6, 10, 20, 5, 6],
                                             dtype=torch.int32,
                                             device=device)
            assert torch.all(torch.eq(dest.int_attr, expected_int_attr))

            expected_ragged_attr = k2.RaggedTensor([[10, 20], [30, 40, 50], [],
                                                    [0], [100, 200],
                                                    [300, 400, 500], [22],
                                                    [33, 55]]).to(device)
            assert dest.ragged_attr == expected_ragged_attr

            expected_float_attr = torch.empty_like(dest.float_attr)
            expected_float_attr[0] = float_attr1[0]
            expected_float_attr[1] = float_attr1[1]
            expected_float_attr[2] = float_attr1[4]
            expected_float_attr[3] = float_attr1[5]
            expected_float_attr[4] = float_attr2[0]
            expected_float_attr[5] = float_attr2[1]
            expected_float_attr[6] = float_attr2[4]
            expected_float_attr[7] = float_attr2[5]

            assert torch.all(torch.eq(dest.float_attr, expected_float_attr))

            expected_scores = torch.empty_like(dest.scores)
            expected_scores[0] = scores_copy1[0]
            expected_scores[1] = scores_copy1[1]
            expected_scores[2] = scores_copy1[4]
            expected_scores[3] = scores_copy1[5]
            expected_scores[4] = scores_copy2[0]
            expected_scores[5] = scores_copy2[1]
            expected_scores[6] = scores_copy2[4]
            expected_scores[7] = scores_copy2[5]

            assert torch.all(torch.eq(dest.scores, expected_scores))

            scale = torch.tensor([10, 20, 30, 40, 50, 60, 70,
                                  80]).to(dest.float_attr)

            (dest.float_attr * scale).sum().backward()
            (expected_float_attr * scale).sum().backward()

            assert torch.all(torch.eq(fsa1.float_attr.grad, float_attr1.grad))
            assert torch.all(torch.eq(fsa2.float_attr.grad, float_attr2.grad))

            (dest.scores * scale).sum().backward()
            (expected_scores * scale).sum().backward()

            assert torch.all(torch.eq(fsa1.scores.grad, scores_copy1.grad))
            assert torch.all(torch.eq(fsa2.scores.grad, scores_copy2.grad))
Beispiel #19
0
    def test(self):
        for device in self.devices:
            s1 = '''
            0 1 11 11
            0 2 12 12
            0 3 13 13
            1 4 -1 0
            2 4 -1 0
            3 4 -1 0
            4
            '''
            fsa1 = k2.Fsa.from_str(s1)

            s2 = '''
            0 1 21 21
            0 2 22 22
            1 2 23 23
            1 3 -1 0
            2 3 -1 0
            3
            '''
            fsa2 = k2.Fsa.from_str(s2)

            s3 = '''
            0 1 31 31
            1 2 32 32
            1 3 33 33
            2 4 -1 0
            3 4 -1 0
            4
            '''
            fsa3 = k2.Fsa.from_str(s3)
            src = k2.create_fsa_vec([fsa1, fsa2, fsa3]).to(device)
            src.requires_grad_(True)

            s0 = '''
            0 1 1 1
            0 2 2 2
            1 3 4 4
            2 3 3 3
            2 4 -1 0
            3 4 -1 0
            4
            '''
            index = k2.Fsa.from_str(s0).to(device)
            index.requires_grad_(True)

            index.aux_label = torch.tensor([1, 2, 3, 4, 5, 6],
                                           dtype=torch.int32,
                                           device=device)

            dest = k2.replace_fsa(src, index, 1)

            actual_str = k2.to_str_simple(dest)
            expected_str = '\n'.join([
                '0 1 0 1', '0 5 0 2', '1 2 11 11', '1 3 12 12', '1 4 13 13',
                '2 8 0 0', '3 8 0 0', '4 8 0 0', '5 6 21 21', '5 7 22 22',
                '6 7 23 23', '6 9 0 0', '7 9 0 0', '8 14 4 4', '9 10 0 3',
                '9 15 -1 0', '10 11 31 31', '11 12 32 32', '11 13 33 33',
                '12 14 0 0', '13 14 0 0', '14 15 -1 0', '15'
            ])

            assert actual_str.strip() == expected_str

            assert torch.all(
                torch.eq(
                    dest.aux_label,
                    torch.tensor([
                        1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 5, 0, 0,
                        0, 0, 0, 6
                    ]).to(device).to(torch.int32)))

            loss = dest.scores.sum()
            (-loss).backward()
            assert torch.allclose(
                index.grad,
                torch.tensor([-1, -1, -1, -1, -1, -1]).to(index.grad))

            assert torch.allclose(
                src.grad,
                torch.tensor([
                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                    -1
                ]).to(src.grad))
Beispiel #20
0
    def test_acceptor_from_str(self):
        s = '''
            0 1  2  -1.2
            0 2  10 -2.2
            1 6 -1  -3.2
            1 3  3  -4.2
            2 6 -1  -5.2
            2 4  2  -6.2
            3 6 -1  -7.2
            5 0  1  -8.2
            6
        '''

        for i in range(4):
            if i == 0:
                fsa = k2.Fsa.from_str(s)
            elif i == 1:
                fsa = k2.Fsa.from_str(s, acceptor=True)
            elif i == 2:
                fsa = k2.Fsa.from_str(s, num_aux_labels=0)
            else:
                fsa = k2.Fsa.from_str(s, aux_label_names=[])

            expected_str = '''
            0 1 2 -1.2
            0 2 10 -2.2
            1 6 -1 -3.2
            1 3 3 -4.2
            2 6 -1 -5.2
            2 4 2 -6.2
            3 6 -1 -7.2
            5 0 1 -8.2
            6
            '''
            assert _remove_leading_spaces(expected_str) == \
                    _remove_leading_spaces(k2.to_str_simple(fsa))

            arcs = fsa.arcs.values()[:, :-1]
            assert isinstance(arcs, torch.Tensor)
            assert arcs.dtype == torch.int32
            assert arcs.device.type == 'cpu'
            assert arcs.shape == (8, 3), 'there should be 8 arcs'
            assert torch.all(
                torch.eq(arcs[0], torch.tensor([0, 1, 2], dtype=torch.int32)))

            assert torch.allclose(
                fsa.scores,
                torch.tensor([-1.2, -2.2, -3.2, -4.2, -5.2, -6.2, -7.2, -8.2],
                             dtype=torch.float32))

            fsa.scores *= -1

            assert torch.allclose(
                fsa.scores,
                torch.tensor([1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2],
                             dtype=torch.float32))

            # test that assigning to labels calls _k2.fix_final_labels as it
            # should.
            fsa.labels = torch.tensor([-1, 10, 0, 1, -1, 1, 0, 2],
                                      dtype=torch.int32)
            assert torch.all(
                torch.eq(
                    fsa.labels,
                    torch.tensor([0, 10, -1, 1, -1, 1, -1, 2],
                                 dtype=torch.int32)))
Beispiel #21
0
    def test_transducer3_from_openfst(self):
        s = '''
            0 1  2  22  33  44  -1.2
            0 2  10 100 101 102 -2.2
            1 6  1  16  17  18  -4.2
            1 3  3  33  34  35  -3.2
            2 6  2  26  27  28  -5.2
            2 4  2  22  23  24  -6.2
            3 6  3  36  37  38  -7.2
            5 0  1  50  51  52  -8.2
            7 -9.2
            6
        '''
        for i in range(2):
            if i == 0:
                fsa = k2.Fsa.from_openfst(s, num_aux_labels=3)
            else:
                fsa = k2.Fsa.from_openfst(s,
                                          aux_label_names=[
                                              'aux_labels', 'aux_labels2',
                                              'aux_labels3'
                                          ])

            assert fsa.aux_labels.dtype == torch.int32
            assert fsa.aux_labels.device.type == 'cpu'
            assert torch.all(
                torch.eq(
                    fsa.aux_labels,
                    torch.tensor([22, 100, 16, 33, 26, 22, 36, 50, -1, -1],
                                 dtype=torch.int32)))

            assert fsa.aux_labels2.dtype == torch.int32
            assert fsa.aux_labels2.device.type == 'cpu'
            assert torch.all(
                torch.eq(
                    fsa.aux_labels2,
                    torch.tensor([33, 101, 17, 34, 27, 23, 37, 51, -1, -1],
                                 dtype=torch.int32)))

            assert fsa.aux_labels3.dtype == torch.int32
            assert fsa.aux_labels3.device.type == 'cpu'
            assert torch.all(
                torch.eq(
                    fsa.aux_labels3,
                    torch.tensor([44, 102, 18, 35, 28, 24, 38, 52, -1, -1],
                                 dtype=torch.int32)))

            assert torch.allclose(
                fsa.scores,
                torch.tensor([1.2, 2.2, 4.2, 3.2, 5.2, 6.2, 7.2, 8.2, 0, 9.2],
                             dtype=torch.float32))

            expected_str = '''
                0 1 2 22 -1.2
                0 2 10 100 -2.2
                1 6 1 16 -4.2
                1 3 3 33 -3.2
                2 6 2 26 -5.2
                2 4 2 22 -6.2
                3 6 3 36 -7.2
                5 0 1 50 -8.2
                6 8 -1 -1 0
                7 8 -1 -1 -9.2
                8
            '''
            assert _remove_leading_spaces(expected_str) == \
                    _remove_leading_spaces(k2.to_str_simple(fsa, openfst=True))
Beispiel #22
0
    def test(self):
        s = '''
            0 1 2 0.1
            0 1 1 0.2
            1 2 -1 0.3
            2
        '''
        for device in self.devices:
            src = k2.Fsa.from_str(s).to(device)
            src.requires_grad_(True)

            scores_copy = src.scores.detach().clone().requires_grad_(True)

            src.attr1 = "hello"
            src.attr2 = "k2"
            float_attr = torch.tensor([0.1, 0.2, 0.3],
                                      dtype=torch.float32,
                                      requires_grad=True,
                                      device=device)
            src.float_attr = float_attr.detach().clone().requires_grad_(True)
            src.int_attr = torch.tensor([1, 2, 3],
                                        dtype=torch.int32,
                                        device=device)
            src.ragged_attr = k2.RaggedTensor([[10, 20], [30, 40, 50],
                                               [60, 70]]).to(device)

            dest, arc_map = k2.arc_sort(src, ret_arc_map=True)

            assert dest.attr1 == src.attr1
            assert dest.attr2 == src.attr2

            expected_arc_map = torch.tensor([1, 0, 2],
                                            dtype=torch.int32,
                                            device=device)
            assert torch.all(torch.eq(arc_map, expected_arc_map))

            actual_str = k2.to_str_simple(dest)
            expected_str = '\n'.join(
                ['0 1 1 0.2', '0 1 2 0.1', '1 2 -1 0.3', '2'])
            assert actual_str.strip() == expected_str

            expected_int_attr = torch.tensor([2, 1, 3],
                                             dtype=torch.int32,
                                             device=device)
            assert torch.all(torch.eq(dest.int_attr, expected_int_attr))

            expected_ragged_attr = k2.RaggedTensor([[30, 40, 50], [10, 20],
                                                    [60, 70]]).to(device)
            assert dest.ragged_attr == expected_ragged_attr

            expected_float_attr = torch.empty_like(dest.float_attr)
            expected_float_attr[0] = float_attr[1]
            expected_float_attr[1] = float_attr[0]
            expected_float_attr[2] = float_attr[2]

            assert torch.all(torch.eq(dest.float_attr, expected_float_attr))

            expected_scores = torch.empty_like(dest.scores)
            expected_scores[0] = scores_copy[1]
            expected_scores[1] = scores_copy[0]
            expected_scores[2] = scores_copy[2]

            assert torch.all(torch.eq(dest.scores, expected_scores))

            scale = torch.tensor([10, 20, 30]).to(float_attr)

            (dest.float_attr * scale).sum().backward()
            (expected_float_attr * scale).sum().backward()
            assert torch.all(torch.eq(src.float_attr.grad, float_attr.grad))

            (dest.scores * scale).sum().backward()
            (expected_scores * scale).sum().backward()
            assert torch.all(torch.eq(src.scores.grad, scores_copy.grad))