def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False): NgramTagger.__init__(self, 1, train, model, backoff, cutoff, verbose)
def getNgramTaggerAccuracy(n, trainingSet, testingSet): # trains and returns the accuracy of the NgramTagger given a value of n # get untagged sentences and gold POS tags testingUntaggedSentences = [[taggedWord[0] for taggedWord in sentence] for sentence in testingSet] testingGoldPOSTags = [[taggedWord[1] for taggedWord in sentence] for sentence in testingSet] # train tagger ngramTagger = NgramTagger(n, trainingSet) # test tagger and get predicted POS tags ngramTaggedSentences = ngramTagger.tag_sents(testingUntaggedSentences) ngramTaggedSentencesPOSTags = [[taggedWord[1] for taggedWord in sentence] for sentence in ngramTaggedSentences] # calculate and return accuracy return calculateAccuracy(testingGoldPOSTags, ngramTaggedSentencesPOSTags)
def test_ngram_taggers(self): unitagger = UnigramTagger(self.corpus, backoff=self.default_tagger) bitagger = BigramTagger(self.corpus, backoff=unitagger) tritagger = TrigramTagger(self.corpus, backoff=bitagger) ntagger = NgramTagger(4, self.corpus, backoff=tritagger) encoded = self.encoder.encode(ntagger) decoded = self.decoder.decode(encoded) self.assertEqual(repr(ntagger), repr(decoded)) self.assertEqual(repr(tritagger), repr(decoded.backoff)) self.assertEqual(repr(bitagger), repr(decoded.backoff.backoff)) self.assertEqual(repr(unitagger), repr(decoded.backoff.backoff.backoff)) self.assertEqual(repr(self.default_tagger), repr(decoded.backoff.backoff.backoff.backoff))
def __init__(self, *args, **kwargs): NgramTagger.__init__(self, 4, *args, **kwargs)
def ngram_tagger(n, train_data, backoff=None): Ngram_Tagger = NgramTagger(n, train_data, backoff=backoff) return (Ngram_Tagger)