Esempio n. 1
0
 def test_tokenize_text_positive02(self):
     """ Tokenization with bringing the resulting tokens to lowercase. """
     src = 'a\t B  c Мама мыла \n\r раму 1\n'
     dst_true = ['a', 'b', 'c', 'мама', 'мыла', 'раму', '1']
     dst_predicted = Seq2SeqLSTM.tokenize_text(src, lowercase=True)
     self.assertEqual(dst_predicted, dst_true)
Esempio n. 2
0
 def test_tokenize_text_positive01(self):
     """ Tokenization with saving of the characters register. """
     src = 'a\t B  c Мама мыла \n\r раму 1\n'
     dst_true = ['a', 'B', 'c', 'Мама', 'мыла', 'раму', '1']
     dst_predicted = Seq2SeqLSTM.tokenize_text(src, lowercase=False)
     self.assertEqual(dst_predicted, dst_true)