def test_subword_tokenize(self): self.assertEqual(subword_tokenize(None), []) self.assertEqual(subword_tokenize(""), []) self.assertIsInstance( subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"), list) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc")) self.assertEqual(subword_tokenize(None, engine="etcc"), []) self.assertEqual(subword_tokenize("", engine="etcc"), []) self.assertIsInstance( subword_tokenize("สวัสดิีดาวอังคาร", engine="etcc"), list) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc")) self.assertIsInstance(subword_tokenize("เบียร์สิงห์", engine="etcc"), list)
def test_subword_tokenize(self): self.assertEqual(subword_tokenize(None), []) self.assertEqual(subword_tokenize(""), []) self.assertIsInstance( subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"), list) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc")) self.assertEqual(subword_tokenize(None, engine="etcc"), []) self.assertEqual(subword_tokenize("", engine="etcc"), []) self.assertIsInstance( subword_tokenize("สวัสดิีดาวอังคาร", engine="etcc"), list) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc")) self.assertIsInstance(subword_tokenize("โควิด19", engine="etcc"), list) self.assertFalse( " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)) with self.assertRaises(ValueError): subword_tokenize("นกแก้ว", engine="XX") # engine does not exist
def test_subword_tokenize(self): self.assertEqual(subword_tokenize(None), []) self.assertEqual(subword_tokenize(""), []) self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="tcc")) self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="etcc"))
def test_subword_tokenize(self): self.assertEqual(subword_tokenize(None), []) self.assertEqual(subword_tokenize(""), []) self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="tcc")) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc") ) self.assertEqual(subword_tokenize(None, engine="etcc"), []) self.assertEqual(subword_tokenize("", engine="etcc"), []) self.assertIsNotNone( subword_tokenize("สวัสดิีดาวอังคาร", engine="etcc") ) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc") ) self.assertIsNotNone(subword_tokenize("เบียร์สิงห์", engine="etcc")) self.assertEqual(subword_tokenize(None, engine="ssg"), []) self.assertEqual(subword_tokenize("", engine="ssg"), []) self.assertTrue( "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg") ) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg") )
def test_subword_tokenize(self): self.assertEqual(subword_tokenize(None), []) self.assertEqual(subword_tokenize(""), []) self.assertIsInstance( subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"), list ) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc") ) self.assertEqual(subword_tokenize(None, engine="etcc"), []) self.assertEqual(subword_tokenize("", engine="etcc"), []) self.assertIsInstance( subword_tokenize("สวัสดิีดาวอังคาร", engine="etcc"), list ) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc") ) self.assertIsInstance(subword_tokenize("โควิด19", engine="etcc"), list) self.assertEqual(subword_tokenize(None, engine="wangchanberta"), []) self.assertEqual(subword_tokenize("", engine="wangchanberta"), []) self.assertIsInstance( subword_tokenize("สวัสดิีดาวอังคาร", engine="wangchanberta"), list ) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="wangchanberta") ) self.assertIsInstance( subword_tokenize("โควิด19", engine="wangchanberta"), list ) self.assertFalse( " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False) ) self.assertEqual( subword_tokenize("สวัสดีชาวโลก", engine="dict"), ["สวัส", "ดี", "ชาว", "โลก"] ) self.assertFalse("า" in subword_tokenize("สวัสดีชาวโลก", engine="dict")) self.assertEqual(subword_tokenize(None, engine="ssg"), []) self.assertEqual(syllable_tokenize("", engine="ssg"), []) self.assertEqual( subword_tokenize("แมวกินปลา", engine="ssg"), ["แมว", "กิน", "ปลา"] ) self.assertTrue( "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg") ) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg") ) self.assertFalse( " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False) ) self.assertEqual(subword_tokenize(None, engine="tltk"), []) self.assertEqual(subword_tokenize("", engine="tltk"), []) self.assertIsInstance( subword_tokenize("สวัสดิีดาวอังคาร", engine="tltk"), list ) self.assertFalse( "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tltk") ) self.assertIsInstance( subword_tokenize("โควิด19", engine="tltk"), list ) with self.assertRaises(ValueError): subword_tokenize("นกแก้ว", engine="XX") # engine does not exist
def test_subword_tokenize(self): self.assertEqual(subword_tokenize(None), "") self.assertEqual(subword_tokenize(""), "") self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร"))
def test_subword_tokenize(self): self.assertEqual(subword_tokenize(None), []) self.assertEqual(subword_tokenize(""), []) self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="tcc")) self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="etcc"))