def test_tagger(tagger_name, tagger_input, test_data, **kwargs): # initialise results tagger_eval = dict() # train tic() tagger_tagger = tagger_name(tagger_input, **kwargs) tagger_eval['train_time'] = toc() # test tic() tagger_eval['test_accuracy'] = tagger_tagger.evaluate(test_data) tagger_eval['test_time'] = toc() # show results display_training_metrics(tagger_eval)
""" 0. set up the corpora for training and testing of tagging methods """ # load the corpus as tagged sentences train_sents, val_sents, test_sents = read_corpus('INTERA', role='train', proportion=PROPORTION, tag_length=TAG_LENGTH) """ # ============================================================================= # investigate NLTK classification tagging options # ============================================================================= """ """ 1. TNT tagger """ tnt_eval = dict() # train tic() tnt_tagger = tnt.TnT() tnt_tagger.train(train_sents) tnt_eval['train_time'] = toc() # test tic() tnt_eval['test_accuracy'] = tnt_tagger.evaluate(val_sents) tnt_eval['test_time'] = toc() # display results display_training_metrics(tnt_eval) """ 2. Naive Bayes classifier tagger """ nb_eval = dict() # train tic() nb_tagger = ClassifierBasedPOSTagger(train=train_sents) nb_eval['train_time'] = toc()