예제 #1
0
    def test_subword_tokenize(self):
        self.assertEqual(subword_tokenize(None), [])
        self.assertEqual(subword_tokenize(""), [])

        self.assertIsInstance(
            subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"), list)
        self.assertFalse(
            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"))

        self.assertEqual(subword_tokenize(None, engine="etcc"), [])
        self.assertEqual(subword_tokenize("", engine="etcc"), [])
        self.assertIsInstance(
            subword_tokenize("สวัสดิีดาวอังคาร", engine="etcc"), list)
        self.assertFalse(
            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc"))
        self.assertIsInstance(subword_tokenize("เบียร์สิงห์", engine="etcc"),
                              list)
 def test_subword_tokenize(self):
     self.assertEqual(subword_tokenize(None), [])
     self.assertEqual(subword_tokenize(""), [])
     self.assertIsInstance(
         subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"), list)
     self.assertFalse(
         "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"))
     self.assertEqual(subword_tokenize(None, engine="etcc"), [])
     self.assertEqual(subword_tokenize("", engine="etcc"), [])
     self.assertIsInstance(
         subword_tokenize("สวัสดิีดาวอังคาร", engine="etcc"), list)
     self.assertFalse(
         "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc"))
     self.assertIsInstance(subword_tokenize("โควิด19", engine="etcc"), list)
     self.assertFalse(
         " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False))
     with self.assertRaises(ValueError):
         subword_tokenize("นกแก้ว", engine="XX")  # engine does not exist
예제 #3
0
파일: __init__.py 프로젝트: xemoe/pythainlp
 def test_subword_tokenize(self):
     self.assertEqual(subword_tokenize(None), [])
     self.assertEqual(subword_tokenize(""), [])
     self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"))
     self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร",
                                           engine="etcc"))
예제 #4
0
    def test_subword_tokenize(self):
        self.assertEqual(subword_tokenize(None), [])
        self.assertEqual(subword_tokenize(""), [])

        self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"))
        self.assertFalse(
            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc")
        )

        self.assertEqual(subword_tokenize(None, engine="etcc"), [])
        self.assertEqual(subword_tokenize("", engine="etcc"), [])
        self.assertIsNotNone(
            subword_tokenize("สวัสดิีดาวอังคาร", engine="etcc")
        )
        self.assertFalse(
            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc")
        )
        self.assertIsNotNone(subword_tokenize("เบียร์สิงห์", engine="etcc"))

        self.assertEqual(subword_tokenize(None, engine="ssg"), [])
        self.assertEqual(subword_tokenize("", engine="ssg"), [])
        self.assertTrue(
            "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
        )
        self.assertFalse(
            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
        )
예제 #5
0
 def test_subword_tokenize(self):
     self.assertEqual(subword_tokenize(None), [])
     self.assertEqual(subword_tokenize(""), [])
     self.assertIsInstance(
         subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"), list
     )
     self.assertFalse(
         "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc")
     )
     self.assertEqual(subword_tokenize(None, engine="etcc"), [])
     self.assertEqual(subword_tokenize("", engine="etcc"), [])
     self.assertIsInstance(
         subword_tokenize("สวัสดิีดาวอังคาร", engine="etcc"), list
     )
     self.assertFalse(
         "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc")
     )
     self.assertIsInstance(subword_tokenize("โควิด19", engine="etcc"), list)
     self.assertEqual(subword_tokenize(None, engine="wangchanberta"), [])
     self.assertEqual(subword_tokenize("", engine="wangchanberta"), [])
     self.assertIsInstance(
         subword_tokenize("สวัสดิีดาวอังคาร", engine="wangchanberta"), list
     )
     self.assertFalse(
         "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="wangchanberta")
     )
     self.assertIsInstance(
         subword_tokenize("โควิด19", engine="wangchanberta"), list
     )
     self.assertFalse(
         " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
     )
     self.assertEqual(
         subword_tokenize("สวัสดีชาวโลก", engine="dict"), ["สวัส", "ดี", "ชาว", "โลก"]
     )
     self.assertFalse("า" in subword_tokenize("สวัสดีชาวโลก", engine="dict"))
     self.assertEqual(subword_tokenize(None, engine="ssg"), [])
     self.assertEqual(syllable_tokenize("", engine="ssg"), [])
     self.assertEqual(
         subword_tokenize("แมวกินปลา", engine="ssg"), ["แมว", "กิน", "ปลา"]
     )
     self.assertTrue(
         "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
     )
     self.assertFalse(
         "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
     )
     self.assertFalse(
         " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
     )
     self.assertEqual(subword_tokenize(None, engine="tltk"), [])
     self.assertEqual(subword_tokenize("", engine="tltk"), [])
     self.assertIsInstance(
         subword_tokenize("สวัสดิีดาวอังคาร", engine="tltk"), list
     )
     self.assertFalse(
         "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tltk")
     )
     self.assertIsInstance(
         subword_tokenize("โควิด19", engine="tltk"), list
     )
     with self.assertRaises(ValueError):
         subword_tokenize("นกแก้ว", engine="XX")  # engine does not exist
예제 #6
0
 def test_subword_tokenize(self):
     self.assertEqual(subword_tokenize(None), "")
     self.assertEqual(subword_tokenize(""), "")
     self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร"))
예제 #7
0
 def test_subword_tokenize(self):
     self.assertEqual(subword_tokenize(None), [])
     self.assertEqual(subword_tokenize(""), [])
     self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"))
     self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="etcc"))