def test_add_forward_end(self): s = u'すもも' lattice = Lattice(len(s), SYS_DIC) entries = SYS_DIC.lookup(s) for entry in entries: lattice.add(Node(entry)) self.assertEqual(9, len(lattice.snodes[1])) self.assertEqual(7, len(lattice.enodes[2])) self.assertEqual(1, len(lattice.enodes[3])) self.assertEqual(1, len(lattice.enodes[4])) self.assertEqual(1, lattice.forward()) entries = SYS_DIC.lookup(s[1:]) for entry in entries: lattice.add(Node(entry)) self.assertEqual(4, len(lattice.snodes[2])) self.assertEqual(3, len(lattice.enodes[3])) self.assertEqual(3, len(lattice.enodes[4])) self.assertEqual(1, lattice.forward()) entries = SYS_DIC.lookup(s[2:]) for entry in entries: lattice.add(Node(entry)) self.assertEqual(2, len(lattice.snodes[3])) self.assertEqual(5, len(lattice.enodes[4])) self.assertEqual(1, lattice.forward()) lattice.end() self.assertTrue(isinstance(lattice.snodes[4][0], EOS)) self.assertTrue(isinstance(lattice.enodes[5][0], EOS))
def test_backward(self): s = u'すもももももももものうち' lattice = Lattice(len(s), SYS_DIC) pos = 0 while pos < len(s): entries = SYS_DIC.lookup(s[pos:]) for e in entries: lattice.add(Node(e)) pos += lattice.forward() lattice.end() min_cost_path = lattice.backward() self.assertEqual(9, len(min_cost_path)) self.assertTrue(isinstance(min_cost_path[0], BOS)) self.assertEqual(u'すもも', min_cost_path[1].surface) self.assertEqual(u'も', min_cost_path[2].surface) self.assertEqual(u'もも', min_cost_path[3].surface) self.assertEqual(u'も', min_cost_path[4].surface) self.assertEqual(u'もも', min_cost_path[5].surface) self.assertEqual(u'の', min_cost_path[6].surface) self.assertEqual(u'うち', min_cost_path[7].surface) self.assertTrue(isinstance(min_cost_path[8], EOS))
index = 0 while pos >= 0: node = self.snodes[pos][index] path.append(node) index = node.back_index pos = node.back_pos path.reverse() return path def __str__(self): return '\n'.join(','.join(str(node) for node in nodes) for nodes in self.snodes) if __name__ == '__main__': from sysdic import SYS_DIC s = u'4日夜、満月が地球の影に完全に入る「皆既月食」が起きた。' lattice = Lattice(len(s), SYS_DIC) pos = 0 while pos < len(s): entries = SYS_DIC.lookup(s[pos:]) for e in entries: lattice.add(Node(e)) pos += lattice.forward() lattice.end() #print(str(lattice)) min_cost_path = lattice.backward() for node in min_cost_path: if isinstance(node, Node): print(node.surface + '\t' + node.part_of_speech)
while pos >= 0: node = self.snodes[pos][index] path.append(node) index = node.back_index pos = node.back_pos path.reverse() return path def __str__(self): return '\n'.join(','.join(str(node) for node in nodes) for nodes in self.snodes) if __name__ == '__main__': from sysdic import SYS_DIC s = u'4日夜、満月が地球の影に完全に入る「皆既月食」が起きた。' lattice = Lattice(len(s), SYS_DIC) pos = 0 while pos < len(s): entries = SYS_DIC.lookup(s[pos:]) for e in entries: lattice.add(Node(e)) pos += lattice.forward() lattice.end() #print(str(lattice)) min_cost_path = lattice.backward() for node in min_cost_path: if isinstance(node, Node): print(node.surface + '\t' + node.part_of_speech)