def __init__(self, **kwargs): super().__init__(**kwargs) self.tagger = kwargs["tagger"] templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] self.brillTrainer = BrillTaggerTrainer(self.tagger, templates, deterministic=True)
def train_brill_tagger(initial_tagger, train_sents, **kwargs): templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] trainer = brill_trainer.BrillTaggerTrainer(initial_tagger, templates, deterministic=True) return trainer.train(train_sents, **kwargs)
def train_brill_tagger(initial_tagger, training, **kwargs): """ Function to train a brill tagger. Uses rules to correct the results of a tagger """ templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] trainer = brill_trainer.BrillTaggerTrainer(initial_tagger, templates, deterministic=True) return trainer.train(training, **kwargs)
def get_brill_tagger(self): train_data = TaggedCorpusReader('.', 'tagged_input_sentences.txt', sep="/") traindata = list(train_data.tagged_sents()) postag = load('taggers/maxent_treebank_pos_tagger/english.pickle') templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])) ] trainer = BrillTaggerTrainer(postag, templates=templates, trace=3) brill_tagger = trainer.train(traindata, max_rules=10) return brill_tagger
def train_brill_tagger(initial_tagger, train_sents, **kwargs): templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] #templates = nltkdemo18() # nltkdemo18plus() # fntbl37() # brill24() trainer = brill_trainer.BrillTaggerTrainer(initial_tagger, templates, deterministic=True, trace=True) return trainer.train(train_sents, **kwargs)
def train(self, data): # baseline tagger: unigram tagger hmm = HMMTagger() hmm.train(data) self.baseline_tagger = hmm.tagger # train brill tagger with HMM as baseline templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] self.trainer = BrillTaggerTrainer(self.baseline_tagger, templates=templates) self.tagger = self.trainer.train(data)
def train_brill_tagger(initial_tagger, train_sents, **kwargs): ''' some suggested rules for the template change the POS of a word, depending on the POS of the previous word change the POS of a word, depending on the POS of any of the two previous words change the POS of a word, depending on the POS of any of the three previous words change the POS of a word, depending on the POS of the previous word and the POS of the next word change the POS of a word, depending on the previous word change the POS of a word, depending on any of the two previous words change the POS of a word, depending on any of the three previous words change the POS of a word, depending on the previous word and the next word ''' # Template generates rule for the Brill Rules that Brill tagger gonna use it templates = [ brill.Template(brill.Pos([-1])), # rule can be generated using the previous POS tag brill.Template(brill.Pos([1])), # look at the next POS tag to generate a rule brill.Template(brill.Pos([-2])), # rule can be generated using the two previous POS tag brill.Template(brill.Pos([2])), # rule can be generated using the next two POS tag brill.Template(brill.Pos([-2, -1])), # look at the combination of the previous two words to learn transformation rule brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] ''' BrillTaggerTrainer(1st, 2nd, 3rd, ...) 1st param initial_tagger: (Tagger) the baseline tagger 2nd param templates : (list of templates) templates to be used in training 3rd param trace: (int) verbosity level == information level u want to see 4th param deterministic: (bool) if True, adjudicate ties deterministically 5th ruleformat: (str) format of reported rules ''' trainer = brill_trainer.BrillTaggerTrainer(initial_tagger, templates, deterministic=True, trace=True) return trainer.train(train_sents, max_rules=100, min_score=2)
def train_brill_tagger(initial_tagger, train_sents, **kwargs): templates = [ brill.Template(brill.Pos([-1])), #a rule can be generated using the previous part-of-speech tag brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), #you can look at the combination of the previous two words to learn a transformation rule brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] trainer = brill_trainer.BrillTaggerTrainer(initial_tagger, templates, deterministic=True) return trainer.train(train_sents, **kwargs)
bigram_tagger = nltk.BigramTagger(tagged_posts, backoff=unigram_tagger) tbuar_tagger = nltk.TrigramTagger(tagged_posts, backoff=bigram_tagger) return tbuar_tagger templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Pos([1])), ] train_sents = nltk.corpus.nps_chat.tagged_posts() initial_tagger = create_backoff_tagger() trainer = brill_trainer.BrillTaggerTrainer(initial_tagger, templates, deterministic=True)
bigram_tagger = BigramTagger(training_data, backoff=unigram_tagger) print "Bigram accuracy: " print bigram_tagger.evaluate(evaulation_data) # Trigram tagger trigram_tagger = TrigramTagger(training_data, backoff=bigram_tagger) print "Trigram accuracy: " print trigram_tagger.evaluate(evaulation_data) # Brill tagger templates templates = [ Template(brill.Pos([1, 1])), Template(brill.Pos([2, 2])), Template(brill.Pos([1, 2])), Template(brill.Pos([1, 3])), Template(brill.Word([1, 1])), Template(brill.Word([2, 2])), Template(brill.Word([1, 2])), Template(brill.Word([1, 3])), Template(brill.Pos([-1, -1]), brill.Pos([1, 1])), Template(brill.Word([-1, -1]), brill.Word([1, 1])), ] # First iteration trainer = brill_trainer.BrillTaggerTrainer(trigram_tagger, templates) brill_tagger = trainer.train(training_data, max_rules, min_score) print "Initial Brill accuracy:" print brill_tagger.evaluate(evaulation_data) # 10 Folding for i in range(1, 5):
import nltk import nltk.tag from nltk.tag import brill from nltk.tag import UnigramTagger from nltk.tag import BrillTaggerTrainer templates = [ brill.Template(brill.Pos([1, 1])), brill.Template(brill.Pos([2, 2])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([1, 3])), brill.Template(brill.Pos([1, 1])), brill.Template(brill.Pos([2, 2])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([1, 3])), brill.Template(brill.Word([-1, -1])), brill.Template(brill.Word([-1, -1])) ] trainer_initial_pos = BrillTaggerTrainer(initial_tagger=custom_pos_tagger, templates=templates, trace=3, deterministic=True) brill_tagger = trainer_initial_pos.train(train_data, max_rules=10) # In[ ]: # In[94]: train_sentences = [ ('Total runs scored by SC Ganguly in match 5?', 'runs'),