def test_translit_xy_missing(self):
     ep = EditProbability(Alphabet('', 'a', 'aa'),
                          Alphabet('', 'b', 'c', 'bc'), lambda x, y: 0.0)
     ep.probs['a', 'b'] = 0.1
     ep.probs['aa', 'bc'] = 0.5
     x2y = ep.transliterator_x2y()
     self.assertEqual(x2y('a_a'), 'bb')
 def test_translit_yx(self):
     ep = EditProbability(Alphabet('', 'a', 'aa'),
                          Alphabet('', 'b', 'c', 'bc'), lambda x, y: 0.0)
     ep.probs['a', 'c'] = 0.5
     ep.probs['a', 'b'] = 0.5
     ep.probs['', 'bc'] = 0.1
     y2x = ep.transliterator_y2x()
     self.assertEqual(y2x('bc'), 'aa')
 def test_oov(self):
     tx = Trie()
     tx.add('')
     tx.add('a')
     ty = Trie()
     ty.add('b')
     ty.add('')
     ep = EditProbability(Alphabet(tx), Alphabet(ty))
     self.assertAlmostEqual(ep.score('a', 'bc'), 0)
 def test_from_data(self):
     ep1 = EditProbability.from_data('tests/files/ep_from_data.txt',
                                     max_lines=3,
                                     unigram_limit=3,
                                     iterations=3,
                                     init_func=lambda x, y: 0.5)
     data = [('a', 'x'), ('ab', 'xy'), ('abc', 'xyz')]
     ax = Alphabet([x for x, y in data])
     ay = Alphabet([y for x, y in data])
     ep2 = EditProbability(ax, ay, lambda x, y: 0.5)
     for i in range(3):
         ep2.iterative_update(data)
     self.assertCountEqual(ep1, ep2)
 def test_score(self):
     tx = Trie()
     tx.add('')
     tx.add('a')
     ty = Trie()
     ty.add('b')
     ty.add('c')
     ty.add('bc')
     ty.add('')
     ep = EditProbability(Alphabet(tx), Alphabet(ty))
     ep.probs['a', ''] = 0.5
     ep.probs['', 'b'] = 0.4
     ep.probs['', 'c'] = 0.3
     ep.probs['a', 'b'] = 0.2
     ep.probs['', 'bc'] = 0.1
     ep.probs['a', 'bc'] = 0.0
     ep.probs['a', 'c'] = 0.0
     self.assertAlmostEqual(ep.score('a', 'bc'), 0.34)
 def test_forward(self):
     tx = Trie()
     tx.add('')
     tx.add('a')
     ty = Trie()
     ty.add('b')
     ty.add('c')
     ty.add('bc')
     ty.add('')
     ep = EditProbability(Alphabet(tx), Alphabet(ty))
     ep.probs['a', ''] = 0.5
     ep.probs['', 'b'] = 0.4
     ep.probs['', 'c'] = 0.3
     ep.probs['a', 'b'] = 0.2
     ep.probs['', 'bc'] = 0.1
     ep.probs['a', 'bc'] = 0.0
     ep.probs['a', 'c'] = 0.0
     r = ep.forward('a', 'bc')
     self.assertAlmostEqual(r[1, 2], 0.34)
 def test_from_string(self):
     ep1 = EditProbability(Alphabet('', 'a'), Alphabet('', 'b'))
     ep1.probs['a', ''] = 0.5
     ep2 = EditProbability.from_string(repr(ep1))
     self.assertCountEqual(ep1, ep2)
 def test_init_reverse(self):
   a = Alphabet(['ab'])
   self.assertCountEqual(a.reversed(),['','a','b','ba'])
 def test_init_alph(self):
   a = Alphabet('a','')
   self.assertCountEqual(a,['','a'])
 def test_unique(self):
   a = Alphabet('a','b','a')
   self.assertCountEqual(a,['a','b'])
 def test_len(self):
   a = Alphabet('a','b')
   self.assertEqual(len(a),2)
 def test_iter(self):
   a = Alphabet('a','b')
   self.assertCountEqual(a,['a','b'])
 def test_from_string(self):
   a = Alphabet('a','b','\'',',','')
   b = Alphabet.from_string(repr(a))
   self.assertCountEqual(a,b)
 def test_repr(self):
   a = Alphabet('a','b','\'',',','')
   self.assertEqual(repr(a),'Alphabet(\'\',\"\'\",\',\',\'a\',\'b\')')
 def test_init_limit(self):
   a = Alphabet(['abcdefg'], unigram_limit=2)
   lens = [len(e) for e in a]
   self.assertCountEqual(lens,[0,1,1,2,2])
 def test_init(self):
   a = Alphabet(['a_'])
   self.assertCountEqual(a,['','a','_','a_'])
 def test_init_trie(self):
   t = Trie()
   t.add('ab')
   a = Alphabet(t)
   self.assertCountEqual(a,['ab'])