def test_pos_trigram_greek(self): """Test tagging Greek POS with trigram tagger.""" tagger = POSTag("grc") tagged = tagger.tag_trigram( "θεοὺς μὲν αἰτῶ τῶνδ᾽ ἀπαλλαγὴν πόνων φρουρᾶς ἐτείας μῆκος" ) # pylint: disable=line-too-long self.assertTrue(tagged)
def test_pos_ngram123_tagger_greek(self): """Test tagging Greek POS with a 1-, 2-, and 3-gram backoff tagger.""" tagger = POSTag("grc") tagged = tagger.tag_ngram_123_backoff( "θεοὺς μὲν αἰτῶ τῶνδ᾽ ἀπαλλαγὴν πόνων φρουρᾶς ἐτείας μῆκος" ) # pylint: disable=line-too-long self.assertTrue(tagged)
def test_pos_ngram123_tagger_old_english(self): """Test tagging Old English POS with a 1-, 2-, and 3-gram backoff tagger.""" tagger = POSTag("ang") tagged = tagger.tag_ngram_123_backoff( "Hwæt! We Gardena in geardagum, þeodcyninga, þrym gefrunon, hu ða æþelingas ellen fremedon." ) # pylint: disable=line-too-long self.assertTrue(tagged)
def test_pos_perceptron_tagger_old_english(self): """Test tagging Old English POS with Perceptron tagger.""" tagger = POSTag("ang") tagged = tagger.tag_perceptron( "Hwæt! We Gardena in geardagum, þeodcyninga, þrym gefrunon, hu ða æþelingas ellen fremedon." ) self.assertTrue(tagged)
def test_pos_ngram12_tagger_middle_low_german(self): """ Test MOG POS 12-backoff tagger""" tagger = POSTag("gml") tagged = tagger.tag_ngram_12_backoff( "Jck Johannes preister verwarer vnde voirs tender des Juncfrouwen kloisters to Mariendale" ) self.assertTrue(tagged)
def test_pos_trigram_old_english(self): """Test tagging old_english POS with trigram tagger.""" tagger = POSTag("ang") tagged = tagger.tag_trigram( "Hwæt! We Gardena in geardagum, þeodcyninga, þrym gefrunon, hu ða æþelingas ellen fremedon." ) self.assertTrue(tagged)
def test_pos_ngram123_tagger_latin(self): """Test tagging Latin POS with a 1-, 2-, and 3-gram backoff tagger.""" tagger = POSTag("lat") tagged = tagger.tag_ngram_123_backoff( "Gallia est omnis divisa in partes tres" ) # pylint: disable=line-too-long self.assertTrue(tagged)
def _retrieve_tag(self, text): """Tag text with chosen tagger and clean tags. Tag format: [('word', 'tag')] :param text: string :return: list of tuples, with each tuple containing the word and its pos tag :rtype : list """ if ( self.tagger == "tag_ngram_123_backoff" ): # Data format: Perseus Style (see https://github.com/cltk/latin_treebank_perseus) tags = POSTag("lat").tag_ngram_123_backoff(text.lower()) return [(tag[0], tag[1]) for tag in tags] elif self.tagger == "tag_tnt": tags = POSTag("lat").tag_tnt(text.lower()) return [(tag[0], tag[1]) for tag in tags] elif self.tagger == "tag_crf": tags = POSTag("lat").tag_crf(text.lower()) return [(tag[0], tag[1]) for tag in tags]
def test_pos_tnt_tagger_old_norse(self): """Test tagging Old Norse POS with TnT tagger.""" tagger = POSTag("non") tagged = tagger.tag_tnt("Hlióðs bið ek allar.") print(tagged) self.assertTrue(tagged)
def test_pos_crf_tagger_latin(self): """Test tagging Latin POS with CRF tagger.""" tagger = POSTag("lat") tagged = tagger.tag_crf("Gallia est omnis divisa in partes tres") self.assertTrue(tagged)
def test_pos_trigram_latin(self): """Test tagging Latin POS with trigram tagger.""" tagger = POSTag("lat") tagged = tagger.tag_trigram("Gallia est omnis divisa in partes tres") self.assertTrue(tagged)
def __init__(self): self.syllabifier = Syllabifier(language="old_norse_ipa") self.tr = Transcriber(DIPHTHONGS_IPA, DIPHTHONGS_IPA_class, IPA_class, old_norse_rules) self.tagger = POSTag("old_norse")