def train(self, data): # baseline tagger: unigram tagger hmm = HMMTagger() hmm.train(data) self.baseline_tagger = hmm.tagger # train brill tagger with HMM as baseline templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] self.trainer = BrillTaggerTrainer(self.baseline_tagger, templates=templates) self.tagger = self.trainer.train(data)
def get_brill_tagger(self): train_data = TaggedCorpusReader('.', 'tagged_input_sentences.txt', sep="/") traindata = list(train_data.tagged_sents()) postag = load('taggers/maxent_treebank_pos_tagger/english.pickle') templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])) ] trainer = BrillTaggerTrainer(postag, templates=templates, trace=3) brill_tagger = trainer.train(traindata, max_rules=10) return brill_tagger
def __init__(self, **kwargs): super().__init__(**kwargs) self.tagger = kwargs["tagger"] templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] self.brillTrainer = BrillTaggerTrainer(self.tagger, templates, deterministic=True)
def nltk_brill_pos_tagger(input_dict): """Brill's transformational rule-based tagger. Brill taggers use an initial tagger (such as ``tag.DefaultTagger``) to assign an initial tag sequence to a text; and then apply an ordered list of transformational rules to correct the tags of individual tokens. These transformation rules are specified by the ``BrillRule`` interface. Brill taggers can be created directly, from an initial tagger and a list of transformational rules; but more often, Brill taggers are created by learning rules from a training corpus, using either ``BrillTaggerTrainer`` or ``FastBrillTaggerTrainer``. :param training_corpus: A tagged corpus consisting of a list of tagged sentences, where each sentence is a list of (word, tag) tuples. :param initial_tagger: The initial tagger. Brill taggers use an initial tagger (such as ``DefaultTagger``) to assign an initial tag sequence to a text. :param max_rules: The maximum number of transformations to be created :param min_score: The minimum acceptable net error reduction that each transformation must produce in the corpus. :param deterministic: If true, then choose between rules that have the same score by picking the one whose __repr__ is lexicographically smaller. If false, then just pick the first rule we find with a given score -- this will depend on the order in which keys are returned from dictionaries, and so may not be the same from one run to the next. If not specified, treat as true iff trace > 0. :param templates: templates to be used in training :returns pos_tagger: A python dictionary containing the POS tagger object and its arguments. """ chunk = input_dict['training_corpus']['chunk'] corpus = input_dict['training_corpus']['corpus'] training_corpus = corpus_reader(corpus, chunk) initial_tagger = input_dict['initial_tagger']['object'] if input_dict['initial_tagger'] else DefaultTagger('-None-') max_rules = int(input_dict['max_rules']) #default 200 min_score = int(input_dict['min_score']) #default 2 deterministic = True templates = getattr(nltk.tag.brill,input_dict['templates'])() trainer = BrillTaggerTrainer(initial_tagger, templates, deterministic=deterministic, trace=settings.DEBUG) brill_tagger = trainer.train(training_corpus, max_rules=max_rules, min_score=min_score) #return BrillTagger(self._initial_tagger, rules) if settings.DEBUG: for rule in brill_tagger.rules(): print(str(rule)) return {'pos_tagger': { 'function':'tag_sents', 'object': brill_tagger } }
def nltk_brill_pos_tagger(input_dict): """Brill's transformational rule-based tagger. Brill taggers use an initial tagger (such as ``tag.DefaultTagger``) to assign an initial tag sequence to a text; and then apply an ordered list of transformational rules to correct the tags of individual tokens. These transformation rules are specified by the ``BrillRule`` interface. Brill taggers can be created directly, from an initial tagger and a list of transformational rules; but more often, Brill taggers are created by learning rules from a training corpus, using either ``BrillTaggerTrainer`` or ``FastBrillTaggerTrainer``. :param training_corpus: A tagged corpus consisting of a list of tagged sentences, where each sentence is a list of (word, tag) tuples. :param initial_tagger: The initial tagger. Brill taggers use an initial tagger (such as ``DefaultTagger``) to assign an initial tag sequence to a text. :param max_rules: The maximum number of transformations to be created :param min_score: The minimum acceptable net error reduction that each transformation must produce in the corpus. :param deterministic: If true, then choose between rules that have the same score by picking the one whose __repr__ is lexicographically smaller. If false, then just pick the first rule we find with a given score -- this will depend on the order in which keys are returned from dictionaries, and so may not be the same from one run to the next. If not specified, treat as true iff trace > 0. :param templates: templates to be used in training :returns pos_tagger: A python dictionary containing the POS tagger object and its arguments. """ training_corpus=corpus_reader(input_dict['training_corpus'])[:1000] initial_tagger=input_dict['initial_tagger']['object'] if input_dict['initial_tagger'] else DefaultTagger('-None-') max_rules=int(input_dict['max_rules']) #default 200 min_score=int(input_dict['min_score']) #default 2 deterministic=True templates = getattr(nltk.tag.brill,input_dict['templates'])() trainer = BrillTaggerTrainer(initial_tagger, templates, deterministic=deterministic, trace=settings.DEBUG) brill_tagger = trainer.train(training_corpus, max_rules=max_rules, min_score=min_score) #return BrillTagger(self._initial_tagger, rules) if settings.DEBUG: for rule in brill_tagger.rules(): print(str(rule)) return {'pos_tagger': { 'function':'tag_sents', 'object': brill_tagger } }
def train_brill_tagger(initial_tagger, train_sents, end, trace=0, **kwargs): bounds = [(1, end)] # call this to fetch templates directly # NOTE : This is the comment from the method below: #### Return 37 templates taken from the postagging task of the #### fntbl distribution http://www.cs.jhu.edu/~rflorian/fntbl/ templates = brill.fntbl37() trainer = BrillTaggerTrainer(initial_tagger, templates, deterministic=True, trace=trace) return trainer.train(train_sents, **kwargs)
def __init__(self): # bounds = [(1, end)] initial_tagger = get_initial_tagger() rules = brill.fntbl37() self.trainer = BrillTaggerTrainer(initial_tagger, rules, deterministic=True, trace=0) train_sents, test_sents = utils.training_testing_dataset() self.tagger = self.trainer.train(train_sents, max_rules=20) print('Brill tagger training completed')
def get_braupt_tagger(): conll_sents = nltk.corpus.conll2000.tagged_sents() # conll_sents = nltk.corpus.conll2002.tagged_sents() word_patterns = get_word_patterns() raubt_tagger = backoff_tagger(conll_sents, [ nltk.tag.AffixTagger, nltk.tag.UnigramTagger, nltk.tag.BigramTagger, nltk.tag.TrigramTagger ], backoff=nltk.tag.RegexpTagger(word_patterns)) templates = brill.brill24() trainer = BrillTaggerTrainer(raubt_tagger, templates) braubt_tagger = trainer.train(conll_sents, max_rules=100, min_score=3) return braubt_tagger
def turkish_brill_tagger(self): train_data = self.set_train_set(self.get_cutoff()) eval_data = self.set_evaluation_set(self.get_cutoff(), self.development_size) tr_unigram = self.turkish_unigram_tagger(train_data, eval_data, self.re_tagger) tr_bigram = self.turkish_bigram_tagger(train_data, eval_data, tr_unigram) tr_trigram = self.turkish_trigram_tagger(train_data, eval_data, tr_bigram) templates = [ Template(brill.Pos, (1, 1)), Template(brill.Pos, (2, 2)), Template(brill.Pos, (1, 2)), Template(brill.Pos, (1, 3)), Template(brill.Word, (1, 1)), Template(brill.Word, (2, 2)), Template(brill.Word, (1, 2)), Template(brill.Word, (1, 3)), Template(brill.Pos, (-1, -1), (1, 1)), Template(brill.Pos, (-1, -1), (1, 1)), ] br_trainer = BrillTaggerTrainer(tr_trigram, templates) tr_brill_tagger = br_trainer.train(train_data, self.max_rules, self.min_score) print("TR initial Brill accuracy: %s" % tr_brill_tagger.evaluate(eval_data)) for i in range(1, 5): self.randomize_sentences() training_data = self.set_train_set(self.get_cutoff()) evaluation_data = self.set_evaluation_set(self.get_cutoff(), self.development_size) print('Fold: %s' % i) tr_brill_tagger = br_trainer.train(training_data, self.max_rules, self.min_score) print("TR Brill accuracy: %s" % tr_brill_tagger.evaluate(evaluation_data)) self.dump_tagger_to_file(tr_brill_tagger)
class BrillTagger(object): def __init__(self): # bounds = [(1, end)] initial_tagger = get_initial_tagger() rules = brill.fntbl37() self.trainer = BrillTaggerTrainer(initial_tagger, rules, deterministic=True, trace=0) train_sents, test_sents = utils.training_testing_dataset() self.tagger = self.trainer.train(train_sents, max_rules=20) print('Brill tagger training completed') def tag(self, sent_tokens): tagged_sentences = [] for sent in sent_tokens: tags = self.tagger.tag([w for w in sent]) tagged_sentences.append(tags) return tagged_sentences
class BrillTagger(Tagger): def train(self, data): # baseline tagger: unigram tagger hmm = HMMTagger() hmm.train(data) self.baseline_tagger = hmm.tagger # train brill tagger with HMM as baseline templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] self.trainer = BrillTaggerTrainer(self.baseline_tagger, templates=templates) self.tagger = self.trainer.train(data) def test(self, test_data): logger.info( "Baseline tagger accuracy: {:.2f}%".format( self.baseline_tagger.evaluate(test_data) * 100.0 ) ) return self.tagger.evaluate(test_data)
class Brill(NltkModel): def __init__(self, **kwargs): super().__init__(**kwargs) self.tagger = kwargs["tagger"] templates = [ brill.Template(brill.Pos([-1])), brill.Template(brill.Pos([1])), brill.Template(brill.Pos([-2])), brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1])), ] self.brillTrainer = BrillTaggerTrainer(self.tagger, templates, deterministic=True) def train(self, text): self.trainedBrill = self.brillTrainer.train(text) def test(self, testText): acc = self.trainedBrill.evaluate(testText) return acc
td, vd = td_sents_by_code[code], vd_sents_by_code[code] hmm_fname = "%s_cv-%i_fold-%i_code-%s_stemed-%s.dill" % (hmm_model_prefix, CV_FOLDS, fold, code, str(STEM)) if os.path.exists(hmm_fname): with open(hmm_fname, "rb") as f: base_tagger = dill.load(f) else: hmm_trainer = HiddenMarkovModelTrainer() base_tagger = hmm_trainer.train_supervised(td) with open(hmm_fname, "wb") as f: dill.dump(base_tagger, f) #See: http://streamhacker.com/2008/12/03/part-of-speech-tagging-with-nltk-part-3/ #and http://streamhacker.com/2014/12/02/nltk-3/ for changes to interface trainer = BrillTaggerTrainer(base_tagger, templates, deterministic=True) model = trainer.train(td, max_rules=MAX_RULES, min_score=MIN_SCORE) code2model[code] = model wd_td_ys_bytag[code] = to_flattened_binary_tags(td) wd_vd_ys_bytag[code] = to_flattened_binary_tags(vd) td_predictions = model.tag_sents(to_sentences(td)) vd_predictions = model.tag_sents(to_sentences(vd)) td_wd_predictions_by_code[code] = to_flattened_binary_tags(td_predictions) vd_wd_predictions_by_code[code] = to_flattened_binary_tags(vd_predictions) merge_dictionaries(wd_td_ys_bytag, cv_wd_td_ys_by_tag) merge_dictionaries(wd_vd_ys_bytag, cv_wd_vd_ys_by_tag) merge_dictionaries(td_wd_predictions_by_code, cv_wd_td_predictions_by_tag)
def train(self, templates=None, verbose=True): """Train a new Brill tagger.""" if templates is None: templates = brill.nltkdemo18() random.seed(len(self.tagged_data_list)) random.shuffle(self.tagged_data_list) cutoff = int(self.dev_size * self.train_size) training_data = self.tagged_data_list[:cutoff] test_data = self.tagged_data_list[cutoff:self.dev_size] # very simple regular expression tagger regex_tagger = RegexpTagger([(r'^-?[0-9]+(.[0-9]+)?$', 'PUNCT'), (r'.*', 'N')]) if verbose == True: print "Regular expression tagger accuracy:\n{}\n".format( regex_tagger.evaluate(test_data)) # unigram tagger unigram_tagger = UnigramTagger(train=training_data, backoff=regex_tagger) if verbose == True: print "Unigram tagger accuracy:\n{}\n".format( unigram_tagger.evaluate(test_data)) # bigram tagger bigram_tagger = BigramTagger(train=training_data, backoff=unigram_tagger) if verbose == True: print "Bigram tagger accuracy:\n{}\n".format( bigram_tagger.evaluate(test_data)) # trigram tagger trigram_tagger = TrigramTagger(train=training_data, backoff=bigram_tagger) if verbose == True: print "Trigram tagger accuracy:\n{}\n".format( trigram_tagger.evaluate(test_data)) # first iteration trainer = BrillTaggerTrainer(initial_tagger=trigram_tagger, templates=templates) brill_tagger = trainer.train(train_sents=training_data, max_rules=self.max_rules, min_score=self.min_score) if verbose == True: print "Initial Brill tagger accuracy:\n{}\n".format( brill_tagger.evaluate(test_data)) # folding for i in range(0, self.num_groups): # random splitting random.seed(len(self.tagged_data_list)) random.shuffle(self.tagged_data_list) cutoff = int(self.dev_size * self.train_size) training_data = self.tagged_data_list[:cutoff] test_data = self.tagged_data_list[cutoff:self.dev_size] # note that .train method returns a BrillTagger() object brill_tagger = trainer.train(train_sents=training_data, max_rules=self.max_rules, min_score=self.min_score) if verbose == True: print "Brill tagger accuracy, fold {}:\n{}\n".format( i + 1, brill_tagger.evaluate(test_data))
def train(self, templates=None, verbose=True): """Train a new Brill tagger.""" if templates is None: templates = brill.nltkdemo18() random.seed(len(self.tagged_data_list)) random.shuffle(self.tagged_data_list) cutoff = int(self.dev_size * self.train_size) training_data = self.tagged_data_list[:cutoff] test_data = self.tagged_data_list[cutoff:self.dev_size] # very simple regular expression tagger regex_tagger = RegexpTagger([ (r'^-?[0-9]+(.[0-9]+)?$', 'PUNCT'), (r'.*', 'N') ]) if verbose == True: print "Regular expression tagger accuracy:\n{}\n".format( regex_tagger.evaluate(test_data)) # unigram tagger unigram_tagger = UnigramTagger(train=training_data, backoff=regex_tagger) if verbose == True: print "Unigram tagger accuracy:\n{}\n".format( unigram_tagger.evaluate(test_data)) # bigram tagger bigram_tagger = BigramTagger(train=training_data, backoff=unigram_tagger) if verbose == True: print "Bigram tagger accuracy:\n{}\n".format( bigram_tagger.evaluate(test_data)) # trigram tagger trigram_tagger = TrigramTagger(train=training_data, backoff=bigram_tagger) if verbose == True: print "Trigram tagger accuracy:\n{}\n".format( trigram_tagger.evaluate(test_data)) # first iteration trainer = BrillTaggerTrainer(initial_tagger=trigram_tagger, templates=templates) brill_tagger = trainer.train(train_sents=training_data, max_rules=self.max_rules, min_score=self.min_score) if verbose == True: print "Initial Brill tagger accuracy:\n{}\n".format( brill_tagger.evaluate(test_data)) # folding for i in range(0, self.num_groups): # random splitting random.seed(len(self.tagged_data_list)) random.shuffle(self.tagged_data_list) cutoff = int(self.dev_size * self.train_size) training_data = self.tagged_data_list[:cutoff] test_data = self.tagged_data_list[cutoff:self.dev_size] # note that .train method returns a BrillTagger() object brill_tagger = trainer.train(train_sents=training_data, max_rules=self.max_rules, min_score=self.min_score) if verbose == True: print "Brill tagger accuracy, fold {}:\n{}\n".format( i+1, brill_tagger.evaluate(test_data))
hmm_fname = "%s_cv-%i_fold-%i_code-%s_stemed-%s.dill" % ( hmm_model_prefix, CV_FOLDS, fold, code, str(STEM)) if os.path.exists(hmm_fname): with open(hmm_fname, "rb") as f: base_tagger = dill.load(f) else: hmm_trainer = HiddenMarkovModelTrainer() base_tagger = hmm_trainer.train_supervised(td) with open(hmm_fname, "wb") as f: dill.dump(base_tagger, f) #See: http://streamhacker.com/2008/12/03/part-of-speech-tagging-with-nltk-part-3/ #and http://streamhacker.com/2014/12/02/nltk-3/ for changes to interface trainer = BrillTaggerTrainer(base_tagger, templates, deterministic=True) model = trainer.train(td, max_rules=MAX_RULES, min_score=MIN_SCORE) code2model[code] = model wd_td_ys_bytag[code] = to_flattened_binary_tags(td) wd_vd_ys_bytag[code] = to_flattened_binary_tags(vd) td_predictions = model.tag_sents(to_sentences(td)) vd_predictions = model.tag_sents(to_sentences(vd)) td_wd_predictions_by_code[code] = to_flattened_binary_tags( td_predictions) vd_wd_predictions_by_code[code] = to_flattened_binary_tags( vd_predictions)
def train_brill_tagger(train_data): # Modules for creating the templates. from nltk import UnigramTagger # The brill tagger module in NLTK. from nltk.tag.brill_trainer import BrillTaggerTrainer from nltk import BigramTagger,UnigramTagger,TrigramTagger import nltk from pickle import dump #unigram_tagger = UnigramTagger(train_data) templates=nltk.tag.brill.fntbl37() #Regular expression (Regex) Tagger as a default tagger default_tagger = nltk.RegexpTagger( [(r'^[Jj]ing', 'ABN'), (r'^[pP]yn', 'CAV'), (r'^[nN]ga$', '1PSG'), (r'^[pP]hi$', '2PG'), (r'^[pP]ha$', '2PF'), (r'^[mM]e$', '2PM'), (r'^[iI]$', '3PSG'), (r'^[bB]an$', 'INP'), (r'^[Kk]a$', '3PSF'), (r'^[uU]$', '3PSM'), (r'^[kK]i$', '3PPG'), (r'(sha|da|na|hapoh|halor|ha|naduh|shaduh|hapdeng|haduh)$', 'IN'), (r'(bad|ruh|namar|hynrei|tangba|katba|katta)$', 'COC'), (r'(lada|haba|khnang|ynda)$', 'SUC'), (r'(katkum|kat|pat|wat|tang|lang)$', 'AD'), (r'(bun|baroh)$', 'QNT'), (r'^-?[0-9]+(.[0-9]+)?$', 'CN'), (r'(dei|long|don)$', 'CO'), (r'^[jJ]ong$', 'POP'), (r'^[sS]hah$', 'PAV'), (r'^[lL]ah$', 'MOD'), (r'^[lL]a$', 'VST'), (r'(ym|em|khlem|nym|kam)$', 'NEG'), (r'^hi$', 'EM'), (r'.*lade$', 'RFP'), (r'(dang|nang)$', 'VPP'), (r'([uU]n|[kK]an|[kK]in|[sS]a|[yY]n|[nN]gin|[pP]hin)$', 'VFT'), (r'(.*ngut|.*tylli)$', 'ADJ'), (r'^[bB]a$', 'COM'), (r'^\W+$', 'SYM'), (r'[^a-z\W]a$', 'IN'), (r'([vV]ote|[bB]ye|[cC]onstituency|[sS]outh)$', 'FR'), (r'.*', 'CMN') ]) t0 = default_tagger print(train_data) t1 = UnigramTagger(train_data,backoff=t0) t2 = BigramTagger(train_data,backoff=t1) t3 = TrigramTagger(train_data,backoff=t2) trainer = BrillTaggerTrainer(initial_tagger=t3, templates=templates, trace=3, deterministic=True) brill_tagger = trainer.train(train_data,max_rules=10) # Saving the Tagger for future use output = open('t2.pkl', 'wb') dump(t3, output, -1) output.close() return brill_tagger
brill.Template(brill.Pos([2])), brill.Template(brill.Pos([-2, -1])), brill.Template(brill.Pos([1, 2])), brill.Template(brill.Pos([-3, -2, -1])), brill.Template(brill.Pos([1, 2, 3])), brill.Template(brill.Pos([-1]), brill.Pos([1])), brill.Template(brill.Word([-1])), brill.Template(brill.Word([1])), brill.Template(brill.Word([-2])), brill.Template(brill.Word([2])), brill.Template(brill.Word([-2, -1])), brill.Template(brill.Word([1, 2])), brill.Template(brill.Word([-3, -2, -1])), brill.Template(brill.Word([1, 2, 3])), brill.Template(brill.Word([-1]), brill.Word([1]))] trainer = BrillTaggerTrainer(postag, templates = templates, trace = 3) brill_tagger = trainer.train(traindata, max_rules = 10) # # Source and Destination Extraction From Sentence # # In[12]: def extract_location(inp): tagged = brill_tagger.tag(word_tokenize(inp)) source = None destination = None chunkGram = """Source: {<IN>(<NN.*><,>?)+}""" chunkParser = nltk.RegexpParser(chunkGram) chunked = chunkParser.parse(tagged) for subtree in chunked.subtrees(filter=lambda t: t.label() == 'Source'):