def benchmark_flair_mdl(): tagger = load_flair_ner_model() start = time.time() flair_sentences = [] for i, sentence in enumerate(sentences_tokens): flair_sentence = Sentence() for token_txt in sentence: flair_sentence.add_token(Token(token_txt)) flair_sentences.append(flair_sentence) tagger.predict(flair_sentences, verbose=True) predictions = [[tok.tags['ner'].value for tok in fs] for fs in flair_sentences] print('Flair:') print("Made predictions on {} sentences and {} tokens in {}s".format( num_sentences, num_tokens, time.time() - start)) assert len(predictions) == num_sentences print( classification_report(sentences_entities, remove_miscs(predictions), digits=4))
def test_flair_tagger(self): # Download model beforehand download_model('flair.ner', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True) print("Downloaded the flair model") # Load the NER tagger using the DaNLP wrapper flair_model = load_flair_ner_model() # Using the flair POS tagger sentence = Sentence( 'jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen') flair_model.predict(sentence) expected_string = "jeg hopper på en bil som er rød sammen med Jens-Peter <B-PER> E. <I-PER> Hansen <I-PER>" self.assertEqual(sentence.to_tagged_string(), expected_string)
def benchmark_flair_mdl(): tagger = load_flair_ner_model() start = time.time() flair_sentences = [] for i, sentence in enumerate(sentences_tokens): flair_sentence = Sentence() for token_txt in sentence: flair_sentence.add_token(Token(token_txt)) flair_sentences.append(flair_sentence) tagger.predict(flair_sentences, verbose=True) predictions = [[tok.get_tag('ner').value for tok in fs] for fs in flair_sentences] print('Flair:') print_speed_performance(start, num_sentences, num_tokens) assert len(predictions) == num_sentences print(f1_report(sentences_entities, remove_miscs(predictions), bio=True))
def setup(self): # A horrible hack to fix a check in flair.models.language_model for pytorch version that fails on cpu if "+cpu" in torch.__version__: torch.__version__ = torch.__version__.replace("+cpu", "") self.model = dm.load_flair_ner_model()
import time # models from danlp.models import load_bert_ner_model, load_flair_ner_model # dataset from danlp.datasets import DDT # utils from flair.data import Sentence, Token from copy import deepcopy from memory_profiler import profile # load models bert = load_bert_ner_model() flair = load_flair_ner_model() # get data (splitted into a training set, a validation set, and a test set) ddt = DDT() train, valid, test = ddt.load_as_simple_ner(True) # divide the observations and the targets of the testset into new variables sentences, categories = test @profile def get_bert_predictions(sentences): predictions = [] '''for sentence in sentences: predictions.append(bert.predict(sentence)[1])''' pred = bert.predict(sentences[0])