def test_translit_xy_missing(self): ep = EditProbability(Alphabet('', 'a', 'aa'), Alphabet('', 'b', 'c', 'bc'), lambda x, y: 0.0) ep.probs['a', 'b'] = 0.1 ep.probs['aa', 'bc'] = 0.5 x2y = ep.transliterator_x2y() self.assertEqual(x2y('a_a'), 'bb')
def test_translit_yx(self): ep = EditProbability(Alphabet('', 'a', 'aa'), Alphabet('', 'b', 'c', 'bc'), lambda x, y: 0.0) ep.probs['a', 'c'] = 0.5 ep.probs['a', 'b'] = 0.5 ep.probs['', 'bc'] = 0.1 y2x = ep.transliterator_y2x() self.assertEqual(y2x('bc'), 'aa')
def test_oov(self): tx = Trie() tx.add('') tx.add('a') ty = Trie() ty.add('b') ty.add('') ep = EditProbability(Alphabet(tx), Alphabet(ty)) self.assertAlmostEqual(ep.score('a', 'bc'), 0)
def test_from_data(self): ep1 = EditProbability.from_data('tests/files/ep_from_data.txt', max_lines=3, unigram_limit=3, iterations=3, init_func=lambda x, y: 0.5) data = [('a', 'x'), ('ab', 'xy'), ('abc', 'xyz')] ax = Alphabet([x for x, y in data]) ay = Alphabet([y for x, y in data]) ep2 = EditProbability(ax, ay, lambda x, y: 0.5) for i in range(3): ep2.iterative_update(data) self.assertCountEqual(ep1, ep2)
def test_score(self): tx = Trie() tx.add('') tx.add('a') ty = Trie() ty.add('b') ty.add('c') ty.add('bc') ty.add('') ep = EditProbability(Alphabet(tx), Alphabet(ty)) ep.probs['a', ''] = 0.5 ep.probs['', 'b'] = 0.4 ep.probs['', 'c'] = 0.3 ep.probs['a', 'b'] = 0.2 ep.probs['', 'bc'] = 0.1 ep.probs['a', 'bc'] = 0.0 ep.probs['a', 'c'] = 0.0 self.assertAlmostEqual(ep.score('a', 'bc'), 0.34)
def test_forward(self): tx = Trie() tx.add('') tx.add('a') ty = Trie() ty.add('b') ty.add('c') ty.add('bc') ty.add('') ep = EditProbability(Alphabet(tx), Alphabet(ty)) ep.probs['a', ''] = 0.5 ep.probs['', 'b'] = 0.4 ep.probs['', 'c'] = 0.3 ep.probs['a', 'b'] = 0.2 ep.probs['', 'bc'] = 0.1 ep.probs['a', 'bc'] = 0.0 ep.probs['a', 'c'] = 0.0 r = ep.forward('a', 'bc') self.assertAlmostEqual(r[1, 2], 0.34)
def test_from_string(self): ep1 = EditProbability(Alphabet('', 'a'), Alphabet('', 'b')) ep1.probs['a', ''] = 0.5 ep2 = EditProbability.from_string(repr(ep1)) self.assertCountEqual(ep1, ep2)
def test_init_reverse(self): a = Alphabet(['ab']) self.assertCountEqual(a.reversed(),['','a','b','ba'])
def test_init_alph(self): a = Alphabet('a','') self.assertCountEqual(a,['','a'])
def test_unique(self): a = Alphabet('a','b','a') self.assertCountEqual(a,['a','b'])
def test_len(self): a = Alphabet('a','b') self.assertEqual(len(a),2)
def test_iter(self): a = Alphabet('a','b') self.assertCountEqual(a,['a','b'])
def test_from_string(self): a = Alphabet('a','b','\'',',','') b = Alphabet.from_string(repr(a)) self.assertCountEqual(a,b)
def test_repr(self): a = Alphabet('a','b','\'',',','') self.assertEqual(repr(a),'Alphabet(\'\',\"\'\",\',\',\'a\',\'b\')')
def test_init_limit(self): a = Alphabet(['abcdefg'], unigram_limit=2) lens = [len(e) for e in a] self.assertCountEqual(lens,[0,1,1,2,2])
def test_init(self): a = Alphabet(['a_']) self.assertCountEqual(a,['','a','_','a_'])
def test_init_trie(self): t = Trie() t.add('ab') a = Alphabet(t) self.assertCountEqual(a,['ab'])