예제 #1
0
 def test_pos_unigram_greek(self):
     """Test tagging Greek POS with unigram tagger."""
     tagger = POSTag("grc")
     tagged = tagger.tag_unigram(
         "θεοὺς μὲν αἰτῶ τῶνδ᾽ ἀπαλλαγὴν πόνων φρουρᾶς ἐτείας μῆκος"
     )  # pylint: disable=line-too-long
     self.assertTrue(tagged)
예제 #2
0
 def test_pos_unigram_old_english(self):
     """Test tagging Old English POS with unigram tagger."""
     tagger = POSTag('old_english')
     tagged = tagger.tag_unigram(
         'Hwæt! We Gardena in geardagum, þeodcyninga, þrym gefrunon, hu ða æþelingas ellen fremedon.'
     )
     self.assertTrue(tagged)
예제 #3
0
파일: views.py 프로젝트: cltk/cltk_api
    def post(self):
        self.reqparse = reqparse.RequestParser()
        self.reqparse.add_argument('string', required=True)
        self.reqparse.add_argument('lang', required=True, choices=POS_METHODS.keys())
        self.reqparse.add_argument('method', required=False,
                                   default=DEFAULT_POS_METHOD)

        args = self.reqparse.parse_args()
        string = args['string']
        lang = args['lang']
        method = args['method']

        if method not in POS_METHODS[lang]:
            return {'message': {'method': method + ' is not a valid choice'}}

        tagger = POSTag(lang)
        tagged = []
        if method == 'unigram':
            tagged = tagger.tag_unigram(string)
        elif method == 'bigram':
            tagged = tagger.tag_bigram(string)
        elif method == 'trigram':
            tagged = tagger.tag_trigram(string)
        elif method == 'ngram123':
            tagged = tagger.tag_ngram_123_backoff(string)
        elif method == 'tnt':
            tagged = tagger.tag_tnt(string)

        return {'tags': [{'word': word, 'tag': tag}
                         if tag is not None else {'word': word, 'tag': 'None'}
                         for word, tag in tagged]}
예제 #4
0
 def test_middle_high_german_unigram_pos_tagger(self):
     target = [('uns', 'PPER'), ('ist', 'VAFIN'), ('in', 'APPR'),
               ('alten', 'ADJA'), ('mæren', 'ADJA'), ('wunders', 'NA'),
               ('vil', 'ADJA'), ('geseit', 'VVPP')]
     mhg_pos_tagger = POSTag("middle_high_german")
     res = mhg_pos_tagger.tag_unigram(
         "uns ist in alten mæren wunders vil geseit")
     self.assertEqual(target, res)
예제 #5
0
 def test_pos_unigram_middle_high_german(self):
     """Test tagging Middle High German with unigram tagger"""
     target = [
         ("uns", "PPER"),
         ("ist", "VAFIN"),
         ("in", "APPR"),
         ("alten", "ADJA"),
         ("mæren", "ADJA"),
         ("wunders", "NA"),
         ("vil", "ADJA"),
         ("geseit", "VVPP"),
     ]
     tagger = POSTag("gmh")
     tagged = tagger.tag_unigram(
         "uns ist in alten mæren wunders vil geseit")
     self.assertEqual(target, tagged)
예제 #6
0
    def post(self):
        self.reqparse = reqparse.RequestParser()
        self.reqparse.add_argument('string', required=True)
        self.reqparse.add_argument('lang',
                                   required=True,
                                   choices=POS_METHODS.keys())
        self.reqparse.add_argument('method',
                                   required=False,
                                   default=DEFAULT_POS_METHOD)

        args = self.reqparse.parse_args()
        string = args['string']
        lang = args['lang']
        method = args['method']

        if method not in POS_METHODS[lang]:
            return {'message': {'method': method + ' is not a valid choice'}}

        tagger = POSTag(lang)
        tagged = []
        if method == 'unigram':
            tagged = tagger.tag_unigram(string)
        elif method == 'bigram':
            tagged = tagger.tag_bigram(string)
        elif method == 'trigram':
            tagged = tagger.tag_trigram(string)
        elif method == 'ngram123':
            tagged = tagger.tag_ngram_123_backoff(string)
        elif method == 'tnt':
            tagged = tagger.tag_tnt(string)

        return {
            'tags': [{
                'word': word,
                'tag': tag
            } if tag is not None else {
                'word': word,
                'tag': 'None'
            } for word, tag in tagged]
        }
예제 #7
0
 def test_pos_unigram_old_english(self):
     """Test tagging Old English POS with unigram tagger."""
     tagger = POSTag('old_english')
     tagged = tagger.tag_unigram('Hwæt! We Gardena in geardagum, þeodcyninga, þrym gefrunon, hu ða æþelingas ellen fremedon.')
     self.assertTrue(tagged)
예제 #8
0
파일: test_tag.py 프로젝트: tnmsahu/cltk
 def test_pos_unigram_latin(self):
     """Test tagging Latin POS with unigram tagger."""
     tagger = POSTag('latin')
     tagged = tagger.tag_unigram('Gallia est omnis divisa in partes tres')
     self.assertTrue(tagged)
예제 #9
0
파일: test_tag.py 프로젝트: TylerKirby/cltk
 def test_pos_unigram_latin(self):
     """Test tagging Latin POS with unigram tagger."""
     tagger = POSTag('latin')
     tagged = tagger.tag_unigram('Gallia est omnis divisa in partes tres')
     self.assertTrue(tagged)
예제 #10
0
파일: test_tag.py 프로젝트: TylerKirby/cltk
 def test_pos_unigram_greek(self):
     """Test tagging Greek POS with unigram tagger."""
     tagger = POSTag('greek')
     tagged = tagger.tag_unigram('θεοὺς μὲν αἰτῶ τῶνδ᾽ ἀπαλλαγὴν πόνων φρουρᾶς ἐτείας μῆκος')  # pylint: disable=line-too-long
     self.assertTrue(tagged)
예제 #11
0
 def getPos(self):
   tagger = POSTag('latin')
   return tagger.tag_unigram(self.text)