def test_word_tokenize_mm(self): self.assertEqual(multi_cut.segment(None), []) self.assertEqual(multi_cut.segment(""), []) self.assertEqual(word_tokenize("", engine="mm"), []) self.assertEqual( word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="mm"), ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"], ) self.assertIsNotNone(multi_cut.find_all_segment("รถไฟฟ้ากรุงเทพมหานครBTS"))
def test_word_tokenize_mm(self): self.assertEqual(multi_cut.segment(None), []) self.assertEqual(multi_cut.segment(""), []) self.assertEqual(word_tokenize("", engine="mm"), []) self.assertEqual( word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="mm"), ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"], ) self.assertIsNotNone(multi_cut.mmcut("ทดสอบ")) self.assertIsNotNone(multi_cut.find_all_segment("รถไฟฟ้ากรุงเทพมหานครBTS")) self.assertEqual(multi_cut.find_all_segment(None), [])
def test_mm(self): self.assertEqual(multi_cut.segment(None), []) self.assertEqual(multi_cut.segment(""), []) self.assertIsNotNone(multi_cut.segment("ตัด", dict_trie([""]))) self.assertEqual(word_tokenize("", engine="mm"), []) self.assertEqual( word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="mm"), ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"], ) self.assertEqual( word_tokenize("19...", engine="mm"), ['19', '...'], ) self.assertEqual( word_tokenize("19.", engine="mm"), ['19', '.'], ) self.assertEqual( word_tokenize("19.84", engine="mm"), ['19.84'], ) self.assertEqual( word_tokenize("127.0.0.1", engine="mm"), ["127.0.0.1"], ) self.assertEqual( word_tokenize("USD1,984.42", engine="mm"), ['USD', '1,984.42'], ) self.assertIsNotNone(multi_cut.mmcut("ทดสอบ")) self.assertIsNotNone( multi_cut.find_all_segment("รถไฟฟ้ากรุงเทพมหานครBTS") ) self.assertEqual(multi_cut.find_all_segment(None), [])