def test_pos_tag(self): tokens = ["ผม", "รัก", "คุณ"] self.assertEqual(pos_tag(None), []) self.assertEqual(pos_tag([]), []) self.assertEqual( pos_tag(["นักเรียน", "ถาม", "ครู"]), [("นักเรียน", "NCMN"), ("ถาม", "VACT"), ("ครู", "NCMN")], ) self.assertEqual( len(pos_tag(["การ", "เดินทาง", "มี", "ความ", "ท้าทาย"])), 5) self.assertEqual(unigram.tag(None, corpus="pud"), []) self.assertEqual(unigram.tag([], corpus="pud"), []) self.assertEqual(unigram.tag(None, corpus="orchid"), []) self.assertEqual(unigram.tag([], corpus="orchid"), []) self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="orchid")) self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="pud")) self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="pud")) self.assertEqual( pos_tag(["คุณ", "กำลัง", "ประชุม"], engine="unigram"), [("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")], ) self.assertTrue( pos_tag(["การ", "รัฐประหาร"], corpus="orchid_ud")[0][1], "NOUN") self.assertTrue( pos_tag(["ความ", "พอเพียง"], corpus="orchid_ud")[0][1], "NOUN") self.assertEqual(perceptron.tag(None, corpus="orchid"), []) self.assertEqual(perceptron.tag([], corpus="orchid"), []) self.assertEqual(perceptron.tag(None, corpus="orchid_ud"), []) self.assertEqual(perceptron.tag([], corpus="orchid_ud"), []) self.assertEqual(perceptron.tag(None, corpus="pud"), []) self.assertEqual(perceptron.tag([], corpus="pud"), []) self.assertIsNotNone( pos_tag(tokens, engine="perceptron", corpus="orchid")) self.assertIsNotNone( pos_tag(tokens, engine="perceptron", corpus="orchid")) self.assertIsNotNone(pos_tag(tokens, engine="perceptron", corpus="pud")) self.assertEqual(pos_tag_sents(None), []) self.assertEqual(pos_tag_sents([]), []) self.assertEqual( pos_tag_sents([["ผม", "กิน", "ข้าว"], ["แมว", "วิ่ง"]]), [ [("ผม", "PPRS"), ("กิน", "VACT"), ("ข้าว", "NCMN")], [("แมว", "NCMN"), ("วิ่ง", "VACT")], ], )
def word_postag(comments): results = [] for sentences in comments: pos_tag = pos_tag_sents(sentences, corpus='pud') results.append(pos_tag) print('Word postag by pythai Done!') return results
def test_pos_tag(self): tokens = ["ผม", "รัก", "คุณ"] self.assertEqual(pos_tag(None), []) self.assertEqual(pos_tag([]), []) self.assertEqual(unigram.tag(None, corpus="pud"), []) self.assertEqual(unigram.tag([], corpus="pud"), []) self.assertEqual(unigram.tag(None, corpus="orchid"), []) self.assertEqual(unigram.tag([], corpus="orchid"), []) self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="orchid")) self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="pud")) self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="pud")) self.assertEqual( pos_tag(word_tokenize("คุณกำลังประชุม"), engine="unigram"), [("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")], ) self.assertIsNotNone( pos_tag(tokens, engine="perceptron", corpus="orchid")) self.assertIsNotNone(pos_tag(tokens, engine="perceptron", corpus="pud")) self.assertEqual(perceptron.tag(None, corpus="pud"), []) self.assertEqual(perceptron.tag([], corpus="pud"), []) self.assertEqual(perceptron.tag(None, corpus="orchid"), []) self.assertEqual(perceptron.tag([], corpus="orchid"), []) self.assertIsNotNone(pos_tag(None, engine="artagger")) self.assertIsNotNone(pos_tag([], engine="artagger")) self.assertIsNotNone(pos_tag(tokens, engine="artagger")) self.assertEqual( pos_tag(word_tokenize("คุณกำลังประชุม"), engine="artagger"), [("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")], ) self.assertEqual(pos_tag_sents(None), []) self.assertEqual(pos_tag_sents([]), []) self.assertEqual( pos_tag_sents([["ผม", "กิน", "ข้าว"], ["แมว", "วิ่ง"]]), [ [("ผม", "PPRS"), ("กิน", "VACT"), ("ข้าว", "NCMN")], [("แมว", "NCMN"), ("วิ่ง", "VACT")], ], )
def test_pos_tag(self): tokens = ["ผม", "รัก", "คุณ"] self.assertEqual(pos_tag(None), []) self.assertEqual(pos_tag([]), []) self.assertEqual(unigram.tag(None, corpus="pud"), []) self.assertEqual(unigram.tag([], corpus="pud"), []) self.assertEqual(unigram.tag(None, corpus="orchid"), []) self.assertEqual(unigram.tag([], corpus="orchid"), []) self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="orchid")) self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="pud")) self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="pud")) self.assertEqual( pos_tag(word_tokenize("คุณกำลังประชุม"), engine="unigram"), [("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")], ) self.assertIsNotNone(pos_tag(tokens, engine="perceptron", corpus="orchid")) self.assertIsNotNone(pos_tag(tokens, engine="perceptron", corpus="pud")) self.assertEqual(perceptron.tag(None, corpus="pud"), []) self.assertEqual(perceptron.tag([], corpus="pud"), []) self.assertEqual(perceptron.tag(None, corpus="orchid"), []) self.assertEqual(perceptron.tag([], corpus="orchid"), []) self.assertIsNotNone(pos_tag(None, engine="artagger")) self.assertIsNotNone(pos_tag([], engine="artagger")) self.assertIsNotNone(pos_tag(tokens, engine="artagger")) self.assertEqual( pos_tag(word_tokenize("คุณกำลังประชุม"), engine="artagger"), [("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")], ) self.assertEqual(pos_tag_sents(None), []) self.assertEqual(pos_tag_sents([]), []) self.assertEqual( pos_tag_sents([["ผม", "กิน", "ข้าว"], ["แมว", "วิ่ง"]]), [ [("ผม", "PPRS"), ("กิน", "VACT"), ("ข้าว", "NCMN")], [("แมว", "NCMN"), ("วิ่ง", "VACT")], ], )
def test_tag(self): self.assertEqual( pos_tag(word_tokenize("คุณกำลังประชุม"), engine='old'), [('คุณ', 'PPRS'), ('กำลัง', 'XVBM'), ('ประชุม', 'VACT')]) self.assertEqual( pos_tag_sents([["ผม", "กิน", "ข้าว"], ["แมว", "วิ่ง"]]), [[('ผม', 'PPRS'), ('กิน', 'VACT'), ('ข้าว', 'NCMN')], [('แมว', 'NCMN'), ('วิ่ง', 'VACT')]]) if sys.version_info >= (3, 4): self.assertEqual( str(type(pos_tag(word_tokenize("ผมรักคุณ"), engine='artagger'))), "<class 'list'>")
def test_tag(self): self.assertEqual(pos_tag(word_tokenize("คุณกำลังประชุม"),engine='old'),[('คุณ', 'PPRS'), ('กำลัง', 'XVBM'), ('ประชุม', 'VACT')]) self.assertEqual(pos_tag_sents([["ผม","กิน","ข้าว"],["แมว","วิ่ง"]]),[[('ผม', 'PPRS'), ('กิน', 'VACT'), ('ข้าว', 'NCMN')], [('แมว', 'NCMN'), ('วิ่ง', 'VACT')]]) if sys.version_info >= (3,4): self.assertEqual(str(type(pos_tag(word_tokenize("ผมรักคุณ"),engine='artagger'))),"<class 'list'>")