Exemple #1
0
 def test_split_slf_line(self):
     lattice = SLFLattice(None)
     fields = lattice._split_slf_line('name=value '
                                      'name="va lue" '
                                      'WORD=\\"QUOTE '
                                      "WORD='CAUSE")
     self.assertEqual(fields[0], 'name=value')
     self.assertEqual(fields[1], 'name=va lue')
     self.assertEqual(fields[2], 'WORD="QUOTE')
     self.assertEqual(fields[3], "WORD='CAUSE")
Exemple #2
0
    def test_slf_to_kaldi(self):
        with open(self.wordmap_path, 'r') as wordmap_file:
            word_to_id = read_kaldi_vocabulary(wordmap_file)
        id_to_word = [None] * len(word_to_id)
        for word, id in word_to_id.items():
            id_to_word[id] = word

        with open(self.slf_path, 'r') as slf_file:
            lattice = SLFLattice(slf_file)
        self._assert_lattice_is_correct(lattice)
        buffer = StringIO()
        lattice.write_kaldi(buffer, word_to_id)
        lattice = KaldiLattice(buffer.getvalue().splitlines(), id_to_word)
        self._assert_lattice_is_correct(lattice)
Exemple #3
0
    def __iter__(self):
        """A generator for iterating through the lattices of this job.
        """

        file_type = TextFileType('r')

        for path in self._lattices:
            logging.info("Reading lattice file `%s´.", path)
            lattice_file = file_type(path)
            if self._lattice_format == 'slf':
                yield SLFLattice(lattice_file)
            else:
                assert self._lattice_format == 'kaldi'
                lattice_lines = []
                id_to_word = self.kaldi_id_to_word
                while True:
                    line = lattice_file.readline()
                    if not line:
                        # end of file
                        if lattice_lines:
                            yield KaldiLattice(lattice_lines, id_to_word)
                        break
                    line = line.strip()
                    if not line:
                        # empty line
                        if lattice_lines:
                            yield KaldiLattice(lattice_lines, id_to_word)
                        lattice_lines = []
                        continue
                    lattice_lines.append(line)
Exemple #4
0
    def test_sorted_nodes(self):
        lattice = Lattice()
        lattice.nodes = [Lattice.Node(id) for id in range(9)]
        lattice.nodes[0].time = 0.0
        lattice.nodes[2].time = 1.0
        lattice.nodes[4].time = 2.0
        lattice.nodes[3].time = 3.0
        lattice.nodes[5].time = 4.0
        lattice.nodes[1].time = 4.0
        lattice.nodes[6].time = 5.0
        lattice.nodes[7].time = None
        lattice.nodes[8].time = -1.0
        lattice._add_link(lattice.nodes[0], lattice.nodes[2])
        lattice._add_link(lattice.nodes[0], lattice.nodes[4])
        lattice._add_link(lattice.nodes[2], lattice.nodes[3])
        lattice._add_link(lattice.nodes[4], lattice.nodes[3])
        lattice._add_link(lattice.nodes[2], lattice.nodes[5])
        lattice._add_link(lattice.nodes[3], lattice.nodes[5])
        lattice._add_link(lattice.nodes[5], lattice.nodes[1])
        lattice._add_link(lattice.nodes[5], lattice.nodes[6])
        lattice._add_link(lattice.nodes[5], lattice.nodes[7])
        lattice._add_link(lattice.nodes[1], lattice.nodes[8])
        lattice._add_link(lattice.nodes[6], lattice.nodes[8])
        lattice._add_link(lattice.nodes[7], lattice.nodes[8])
        lattice.initial_node = lattice.nodes[0]
        lattice.final_node = lattice.nodes[8]

        sorted_nodes = lattice.sorted_nodes()
        self.assertEqual(sorted_nodes[0].id, 0)
        self.assertEqual(sorted_nodes[1].id, 2)
        self.assertEqual(sorted_nodes[2].id, 4)
        self.assertEqual(sorted_nodes[3].id, 3)
        self.assertEqual(sorted_nodes[4].id, 5)
        # Topologically equal nodes will be sorted in ascending time. The nodes
        # that don't have time will go last.
        self.assertEqual(sorted_nodes[5].id, 1)
        self.assertEqual(sorted_nodes[6].id, 6)
        self.assertEqual(sorted_nodes[7].id, 7)
        self.assertEqual(sorted_nodes[8].id, 8)

        with open(self.lattice_path, 'r') as lattice_file:
            lattice = SLFLattice(lattice_file)

        def reachable(initial_node, node):
            result = False
            for link in initial_node.out_links:
                if link.end_node is node:
                    result = True
            return result

        sorted_nodes = lattice.sorted_nodes()
        for left_node, right_node in zip(sorted_nodes, sorted_nodes[1:]):
            if (not left_node.time is None) and (not right_node.time is None):
                self.assertLessEqual(left_node.time, right_node.time)
                self.assertFalse(reachable(right_node, left_node))
    def setUp(self):
        self.maxDiff = None
        script_path = os.path.dirname(os.path.realpath(__file__))

        vocabulary_path = os.path.join(script_path, 'vocabulary.txt')
        with open(vocabulary_path) as vocabulary_file:
            self.vocabulary = Vocabulary.from_file(vocabulary_file,
                                                   'words',
                                                   oos_words=['oos1', 'oos2'])
        self.vocabulary.compute_probs({
            'yksi': 1,
            'kaksi': 1,
            'kolme': 1,
            'neljä': 1,
            'viisi': 1,
            'kuusi': 1,
            'seitsemän': 1,
            'kahdeksan': 1,
            'yhdeksän': 1,
            'kymmenen': 1,
            'oos1': 1,
            'oos2': 2
        })

        self.sos_id = self.vocabulary.word_to_id['<s>']
        self.yksi_id = self.vocabulary.word_to_id['yksi']
        self.kaksi_id = self.vocabulary.word_to_id['kaksi']
        self.eos_id = self.vocabulary.word_to_id['</s>']
        self.unk_id = self.vocabulary.word_to_id['<unk>']
        self.oos1_id = self.vocabulary.word_to_id['oos1']
        self.oos2_id = self.vocabulary.word_to_id['oos2']

        projection_vector = tensor.zeros(
            shape=(self.vocabulary.num_shortlist_words(), ),
            dtype=theano.config.floatX)
        self.sos_prob = 0.1
        projection_vector = tensor.set_subtensor(
            projection_vector[self.sos_id], self.sos_prob)
        self.yksi_prob = 0.2
        projection_vector = tensor.set_subtensor(
            projection_vector[self.yksi_id], self.yksi_prob)
        self.kaksi_prob = 0.3
        projection_vector = tensor.set_subtensor(
            projection_vector[self.kaksi_id], self.kaksi_prob)
        self.eos_prob = 0.4
        projection_vector = tensor.set_subtensor(
            projection_vector[self.eos_id], self.eos_prob)
        self.unk_prob = 0.3
        projection_vector = tensor.set_subtensor(
            projection_vector[self.unk_id], self.unk_prob)
        self.network = DummyNetwork(self.vocabulary, projection_vector)

        lattice_path = os.path.join(script_path, 'lattice.slf')
        with open(lattice_path) as lattice_file:
            self.lattice = SLFLattice(lattice_file)
Exemple #6
0
 def test_read(self):
     lattice = SLFLattice()
     with open(self.lattice_path, 'r') as lattice_file:
         lattice.read(lattice_file)
     self.assertEqual(len(lattice.nodes), 24)
     self.assertEqual(len(lattice.links), 39)
Exemple #7
0
    def test_read_slf_link(self):
        lattice = SLFLattice(None)
        lattice.nodes = [Lattice.Node(id) for id in range(4)]
        lattice.links = []
        lattice._read_slf_node(0, ['t=0.0'])
        lattice._read_slf_node(1, ['t=1.0'])
        lattice._read_slf_node(2, ['t=2.0'])
        lattice._read_slf_node(3, ['t=3.0'])
        lattice._read_slf_link(0, ['START=0', 'END=1'])
        lattice._read_slf_link(1, ['S=1', 'E=2', 'WORD=wo rd', 'acoustic=-0.1', 'language=-0.2'])
        lattice._read_slf_link(2, ['S=2', 'E=3', 'W=word', 'a=-0.3', 'l=-0.4'])
        lattice._read_slf_link(3, ['S=1', 'E=3', 'a=-0.5', 'l=-0.6'])

        self.assertTrue(lattice.links[0].start_node is lattice.nodes[0])
        self.assertTrue(lattice.links[0].end_node is lattice.nodes[1])
        self.assertTrue(lattice.links[1].start_node is lattice.nodes[1])
        self.assertTrue(lattice.links[1].end_node is lattice.nodes[2])
        self.assertEqual(lattice.links[1].word, 'wo rd')
        self.assertEqual(lattice.links[1].ac_logprob, -0.1)
        self.assertEqual(lattice.links[1].lm_logprob, -0.2)
        self.assertTrue(lattice.links[2].start_node is lattice.nodes[2])
        self.assertTrue(lattice.links[2].end_node is lattice.nodes[3])
        self.assertEqual(lattice.links[2].word, 'word')
        self.assertEqual(lattice.links[2].ac_logprob, -0.3)
        self.assertEqual(lattice.links[2].lm_logprob, -0.4)
        self.assertTrue(lattice.links[3].start_node is lattice.nodes[1])
        self.assertTrue(lattice.links[3].end_node is lattice.nodes[3])
        self.assertEqual(lattice.links[3].ac_logprob, -0.5)
        self.assertEqual(lattice.links[3].lm_logprob, -0.6)

        self.assertEqual(len(lattice.nodes[0].in_links), 0)
        self.assertEqual(len(lattice.nodes[0].out_links), 1)
        self.assertEqual(len(lattice.nodes[1].in_links), 1)
        self.assertEqual(len(lattice.nodes[1].out_links), 2)
        self.assertEqual(len(lattice.nodes[2].in_links), 1)
        self.assertEqual(len(lattice.nodes[2].out_links), 1)
        self.assertEqual(len(lattice.nodes[3].in_links), 2)
        self.assertEqual(len(lattice.nodes[3].out_links), 0)

        self.assertEqual(lattice.nodes[0].out_links[0].end_node.time, 1.0)
        self.assertEqual(lattice.nodes[1].in_links[0].start_node.time, 0.0)
        self.assertEqual(lattice.nodes[1].out_links[0].end_node.time, 2.0)
        self.assertEqual(lattice.nodes[1].out_links[1].end_node.time, 3.0)
        self.assertEqual(lattice.nodes[2].in_links[0].start_node.time, 1.0)
        self.assertEqual(lattice.nodes[2].out_links[0].end_node.time, 3.0)
        self.assertEqual(lattice.nodes[3].in_links[0].start_node.time, 2.0)
        self.assertEqual(lattice.nodes[3].in_links[1].start_node.time, 1.0)
Exemple #8
0
    def test_read_slf_node(self):
        lattice = SLFLattice(None)
        lattice.nodes = [Lattice.Node(id) for id in range(5)]
        lattice._read_slf_node(0, [])
        lattice._read_slf_node(1, ['t=1.0'])
        lattice._read_slf_node(2, ['time=2.1'])
        lattice._read_slf_node(3, ['t=3.0', 'WORD=wo rd'])
        lattice._read_slf_node(4, ['time=4.1', 'W=word'])

        self.assertEqual(lattice.nodes[1].time, 1.0)
        self.assertEqual(lattice.nodes[2].time, 2.1)
        self.assertEqual(lattice.nodes[3].time, 3.0)
        self.assertEqual(lattice.nodes[3].word, 'wo rd')
        self.assertEqual(lattice.nodes[4].time, 4.1)
        self.assertEqual(lattice.nodes[4].word, 'word')
Exemple #9
0
 def test_read_slf_header(self):
     lattice = SLFLattice(None)
     lattice._read_slf_header(['UTTERANCE=utterance #123'])
     self.assertEqual(lattice.utterance_id, 'utterance #123')
     lattice._read_slf_header(['U=utterance #456'])
     self.assertEqual(lattice.utterance_id, 'utterance #456')
     lattice._read_slf_header(['base=10', 'lmscale=1.2', 'wdpenalty=1.3'])
     self.assertEqual(math.log(0.1, 10.0) * lattice._log_scale, math.log(0.1))
     self.assertEqual(lattice.lm_scale, 1.2)
     self.assertEqual(lattice.wi_penalty, 1.3)
     lattice._read_slf_header(['start=2', 'end=3'])
     self.assertEqual(lattice._initial_node_id, 2)
     self.assertEqual(lattice._final_node_id, 3)
     lattice._read_slf_header(['NODES=5', 'LINKS=7'])
     self.assertEqual(lattice._num_nodes, 5)
     self.assertEqual(lattice._num_links, 7)
     lattice._read_slf_header(['N=8', 'L=9'])
     self.assertEqual(lattice._num_nodes, 8)
     self.assertEqual(lattice._num_links, 9)
Exemple #10
0
 def test_split_slf_field(self):
     lattice = SLFLattice(None)
     name, value = _split_slf_field("name=va 'lue")
     self.assertEqual(name, 'name')
     self.assertEqual(value, "va 'lue")
Exemple #11
0
 def test_init(self):
     with open(self.lattice_path, 'r') as lattice_file:
         lattice = SLFLattice(lattice_file)
     self.assertEqual(len(lattice.nodes), 24)
     self.assertEqual(len(lattice.links), 39)
Exemple #12
0
 def test_move_words_to_links(self):
     lattice = SLFLattice(None)
     lattice.nodes = [Lattice.Node(id) for id in range(5)]
     lattice.nodes[0].word = 'A'
     lattice.nodes[1].word = 'B'
     lattice.nodes[2].word = 'C'
     lattice.nodes[3].word = 'D'
     lattice.nodes[4].word = 'E'
     lattice.initial_node = lattice.nodes[0]
     lattice.final_node = lattice.nodes[4]
     lattice._add_link(lattice.nodes[0], lattice.nodes[1])
     lattice._add_link(lattice.nodes[0], lattice.nodes[2])
     lattice._add_link(lattice.nodes[1], lattice.nodes[3])
     lattice._add_link(lattice.nodes[2], lattice.nodes[3])
     lattice._add_link(lattice.nodes[3], lattice.nodes[4])
     lattice._move_words_to_links()
     self.assertEqual(lattice.links[0].word, 'B')
     self.assertEqual(lattice.links[1].word, 'C')
     self.assertEqual(lattice.links[2].word, 'D')
     self.assertEqual(lattice.links[3].word, 'D')
     self.assertEqual(lattice.links[4].word, 'E')
     for node in lattice.nodes:
         self.assertFalse(hasattr(node, 'word'))
Exemple #13
0
 def test_move_words_to_links(self):
     lattice = SLFLattice(None)
     lattice.nodes = [Lattice.Node(id) for id in range(5)]
     lattice.nodes[0].word = 'A'
     lattice.nodes[1].word = 'B'
     lattice.nodes[2].word = 'C'
     lattice.nodes[3].word = 'D'
     lattice.nodes[4].word = 'E'
     lattice.initial_node = lattice.nodes[0]
     lattice.final_node = lattice.nodes[4]
     lattice._add_link(lattice.nodes[0], lattice.nodes[1])
     lattice._add_link(lattice.nodes[0], lattice.nodes[2])
     lattice._add_link(lattice.nodes[1], lattice.nodes[3])
     lattice._add_link(lattice.nodes[2], lattice.nodes[3])
     lattice._add_link(lattice.nodes[3], lattice.nodes[4])
     lattice._move_words_to_links()
     self.assertEqual(lattice.links[0].word, 'B')
     self.assertEqual(lattice.links[1].word, 'C')
     self.assertEqual(lattice.links[2].word, 'D')
     self.assertEqual(lattice.links[3].word, 'D')
     self.assertEqual(lattice.links[4].word, 'E')
     for node in lattice.nodes:
         self.assertFalse(hasattr(node, 'word'))
Exemple #14
0
    def test_read_slf_link(self):
        lattice = SLFLattice(None)
        lattice.nodes = [Lattice.Node(id) for id in range(4)]
        lattice.links = []
        lattice._read_slf_node(0, ['t=0.0'])
        lattice._read_slf_node(1, ['t=1.0'])
        lattice._read_slf_node(2, ['t=2.0'])
        lattice._read_slf_node(3, ['t=3.0'])
        lattice._read_slf_link(0, ['START=0', 'END=1'])
        lattice._read_slf_link(1, ['S=1', 'E=2', 'WORD=wo rd', 'acoustic=-0.1', 'language=-0.2'])
        lattice._read_slf_link(2, ['S=2', 'E=3', 'W=word', 'a=-0.3', 'l=-0.4'])
        lattice._read_slf_link(3, ['S=1', 'E=3', 'a=-0.5', 'l=-0.6'])

        self.assertTrue(lattice.links[0].start_node is lattice.nodes[0])
        self.assertTrue(lattice.links[0].end_node is lattice.nodes[1])
        self.assertTrue(lattice.links[1].start_node is lattice.nodes[1])
        self.assertTrue(lattice.links[1].end_node is lattice.nodes[2])
        self.assertEqual(lattice.links[1].word, 'wo rd')
        self.assertEqual(lattice.links[1].ac_logprob, -0.1)
        self.assertEqual(lattice.links[1].lm_logprob, -0.2)
        self.assertTrue(lattice.links[2].start_node is lattice.nodes[2])
        self.assertTrue(lattice.links[2].end_node is lattice.nodes[3])
        self.assertEqual(lattice.links[2].word, 'word')
        self.assertEqual(lattice.links[2].ac_logprob, -0.3)
        self.assertEqual(lattice.links[2].lm_logprob, -0.4)
        self.assertTrue(lattice.links[3].start_node is lattice.nodes[1])
        self.assertTrue(lattice.links[3].end_node is lattice.nodes[3])
        self.assertEqual(lattice.links[3].ac_logprob, -0.5)
        self.assertEqual(lattice.links[3].lm_logprob, -0.6)

        self.assertEqual(len(lattice.nodes[0].in_links), 0)
        self.assertEqual(len(lattice.nodes[0].out_links), 1)
        self.assertEqual(len(lattice.nodes[1].in_links), 1)
        self.assertEqual(len(lattice.nodes[1].out_links), 2)
        self.assertEqual(len(lattice.nodes[2].in_links), 1)
        self.assertEqual(len(lattice.nodes[2].out_links), 1)
        self.assertEqual(len(lattice.nodes[3].in_links), 2)
        self.assertEqual(len(lattice.nodes[3].out_links), 0)

        self.assertEqual(lattice.nodes[0].out_links[0].end_node.time, 1.0)
        self.assertEqual(lattice.nodes[1].in_links[0].start_node.time, 0.0)
        self.assertEqual(lattice.nodes[1].out_links[0].end_node.time, 2.0)
        self.assertEqual(lattice.nodes[1].out_links[1].end_node.time, 3.0)
        self.assertEqual(lattice.nodes[2].in_links[0].start_node.time, 1.0)
        self.assertEqual(lattice.nodes[2].out_links[0].end_node.time, 3.0)
        self.assertEqual(lattice.nodes[3].in_links[0].start_node.time, 2.0)
        self.assertEqual(lattice.nodes[3].in_links[1].start_node.time, 1.0)
Exemple #15
0
    def test_read_slf_node(self):
        lattice = SLFLattice(None)
        lattice.nodes = [Lattice.Node(id) for id in range(5)]
        lattice._read_slf_node(0, [])
        lattice._read_slf_node(1, ['t=1.0'])
        lattice._read_slf_node(2, ['time=2.1'])
        lattice._read_slf_node(3, ['t=3.0', 'WORD=wo rd'])
        lattice._read_slf_node(4, ['time=4.1', 'W=word'])

        self.assertEqual(lattice.nodes[1].time, 1.0)
        self.assertEqual(lattice.nodes[2].time, 2.1)
        self.assertEqual(lattice.nodes[3].time, 3.0)
        self.assertEqual(lattice.nodes[3].word, 'wo rd')
        self.assertEqual(lattice.nodes[4].time, 4.1)
        self.assertEqual(lattice.nodes[4].word, 'word')
Exemple #16
0
 def test_read_slf_header(self):
     lattice = SLFLattice(None)
     lattice._read_slf_header(['UTTERANCE=utterance #123'])
     self.assertEqual(lattice.utterance_id, 'utterance #123')
     lattice._read_slf_header(['U=utterance #456'])
     self.assertEqual(lattice.utterance_id, 'utterance #456')
     lattice._read_slf_header(['base=10', 'lmscale=1.2', 'wdpenalty=1.3'])
     self.assertEqual(math.log(0.1, 10.0) * lattice._log_scale, math.log(0.1))
     self.assertEqual(lattice.lm_scale, 1.2)
     self.assertEqual(lattice.wi_penalty, 1.3)
     lattice._read_slf_header(['start=2', 'end=3'])
     self.assertEqual(lattice._initial_node_id, 2)
     self.assertEqual(lattice._final_node_id, 3)
     lattice._read_slf_header(['NODES=5', 'LINKS=7'])
     self.assertEqual(lattice._num_nodes, 5)
     self.assertEqual(lattice._num_links, 7)
     lattice._read_slf_header(['N=8', 'L=9'])
     self.assertEqual(lattice._num_nodes, 8)
     self.assertEqual(lattice._num_links, 9)
Exemple #17
0
 def test_split_slf_field(self):
     lattice = SLFLattice(None)
     name, value = lattice._split_slf_field("name=va 'lue")
     self.assertEqual(name, 'name')
     self.assertEqual(value, "va 'lue")