Ejemplo n.º 1
0
 def test_pos_bigram_old_english(self):
     """Test tagging Old English POS with bigram tagger."""
     tagger = POSTag('old_english')
     tagged = tagger.tag_bigram(
         'Hwæt! We Gardena in geardagum, þeodcyninga, þrym gefrunon, hu ða æþelingas ellen fremedon.'
     )
     self.assertTrue(tagged)
Ejemplo n.º 2
0
 def test_pos_bigram_greek(self):
     """Test tagging Greek POS with bigram tagger."""
     tagger = POSTag("grc")
     tagged = tagger.tag_bigram(
         "θεοὺς μὲν αἰτῶ τῶνδ᾽ ἀπαλλαγὴν πόνων φρουρᾶς ἐτείας μῆκος"
     )  # pylint: disable=line-too-long
     self.assertTrue(tagged)
Ejemplo n.º 3
0
    def post(self):
        self.reqparse = reqparse.RequestParser()
        self.reqparse.add_argument('string', required=True)
        self.reqparse.add_argument('lang', required=True, choices=POS_METHODS.keys())
        self.reqparse.add_argument('method', required=False,
                                   default=DEFAULT_POS_METHOD)

        args = self.reqparse.parse_args()
        string = args['string']
        lang = args['lang']
        method = args['method']

        if method not in POS_METHODS[lang]:
            return {'message': {'method': method + ' is not a valid choice'}}

        tagger = POSTag(lang)
        tagged = []
        if method == 'unigram':
            tagged = tagger.tag_unigram(string)
        elif method == 'bigram':
            tagged = tagger.tag_bigram(string)
        elif method == 'trigram':
            tagged = tagger.tag_trigram(string)
        elif method == 'ngram123':
            tagged = tagger.tag_ngram_123_backoff(string)
        elif method == 'tnt':
            tagged = tagger.tag_tnt(string)

        return {'tags': [{'word': word, 'tag': tag}
                         if tag is not None else {'word': word, 'tag': 'None'}
                         for word, tag in tagged]}
Ejemplo n.º 4
0
 def test_middle_high_german_bigram_pos_tagger(self):
     target = [('uns', 'PPER'), ('ist', 'VAFIN'), ('in', 'APPR'),
               ('alten', 'ADJA'), ('mæren', 'NA'), ('wunders', 'NA'),
               ('vil', None), ('geseit', None)]
     mhg_pos_tagger = POSTag("middle_high_german")
     res = mhg_pos_tagger.tag_bigram(
         "uns ist in alten mæren wunders vil geseit")
     self.assertEqual(target, res)
Ejemplo n.º 5
0
 def test_pos_bigram_middle_high_german(self):
     """Test tagging Middle High German with bigram tagger"""
     target = [
         ("uns", "PPER"),
         ("ist", "VAFIN"),
         ("in", "APPR"),
         ("alten", "ADJA"),
         ("mæren", "NA"),
         ("wunders", "NA"),
         ("vil", None),
         ("geseit", None),
     ]
     tagger = POSTag("gmh")
     tagged = tagger.tag_bigram("uns ist in alten mæren wunders vil geseit")
     self.assertEqual(target, tagged)
Ejemplo n.º 6
0
    def post(self):
        self.reqparse = reqparse.RequestParser()
        self.reqparse.add_argument('string', required=True)
        self.reqparse.add_argument('lang',
                                   required=True,
                                   choices=POS_METHODS.keys())
        self.reqparse.add_argument('method',
                                   required=False,
                                   default=DEFAULT_POS_METHOD)

        args = self.reqparse.parse_args()
        string = args['string']
        lang = args['lang']
        method = args['method']

        if method not in POS_METHODS[lang]:
            return {'message': {'method': method + ' is not a valid choice'}}

        tagger = POSTag(lang)
        tagged = []
        if method == 'unigram':
            tagged = tagger.tag_unigram(string)
        elif method == 'bigram':
            tagged = tagger.tag_bigram(string)
        elif method == 'trigram':
            tagged = tagger.tag_trigram(string)
        elif method == 'ngram123':
            tagged = tagger.tag_ngram_123_backoff(string)
        elif method == 'tnt':
            tagged = tagger.tag_tnt(string)

        return {
            'tags': [{
                'word': word,
                'tag': tag
            } if tag is not None else {
                'word': word,
                'tag': 'None'
            } for word, tag in tagged]
        }
Ejemplo n.º 7
0
 def test_pos_bigram_old_english(self):
     """Test tagging Old English POS with bigram tagger."""
     tagger = POSTag('old_english')
     tagged = tagger.tag_bigram('Hwæt! We Gardena in geardagum, þeodcyninga, þrym gefrunon, hu ða æþelingas ellen fremedon.')
     self.assertTrue(tagged)
Ejemplo n.º 8
0
 def test_pos_bigram_latin(self):
     """Test tagging Latin POS with bigram tagger."""
     tagger = POSTag('latin')
     tagged = tagger.tag_bigram('Gallia est omnis divisa in partes tres')
     self.assertTrue(tagged)
Ejemplo n.º 9
0
 def test_pos_bigram_greek(self):
     """Test tagging Greek POS with bigram tagger."""
     tagger = POSTag('greek')
     tagged = tagger.tag_bigram('θεοὺς μὲν αἰτῶ τῶνδ᾽ ἀπαλλαγὴν πόνων φρουρᾶς ἐτείας μῆκος')  # pylint: disable=line-too-long
     self.assertTrue(tagged)
Ejemplo n.º 10
0
 def test_pos_bigram_latin(self):
     """Test tagging Latin POS with bigram tagger."""
     tagger = POSTag('latin')
     tagged = tagger.tag_bigram('Gallia est omnis divisa in partes tres')
     self.assertTrue(tagged)