예제 #1
0
def benchmark_flair_mdl():
    tagger = load_flair_ner_model()

    start = time.time()

    flair_sentences = []
    for i, sentence in enumerate(sentences_tokens):
        flair_sentence = Sentence()

        for token_txt in sentence:
            flair_sentence.add_token(Token(token_txt))
        flair_sentences.append(flair_sentence)

    tagger.predict(flair_sentences, verbose=True)
    predictions = [[tok.tags['ner'].value for tok in fs]
                   for fs in flair_sentences]
    print('Flair:')
    print("Made predictions on {} sentences and {} tokens in {}s".format(
        num_sentences, num_tokens,
        time.time() - start))

    assert len(predictions) == num_sentences

    print(
        classification_report(sentences_entities,
                              remove_miscs(predictions),
                              digits=4))
예제 #2
0
    def test_flair_tagger(self):
        # Download model beforehand
        download_model('flair.ner',
                       DEFAULT_CACHE_DIR,
                       process_func=_unzip_process_func,
                       verbose=True)
        print("Downloaded the flair model")

        # Load the NER tagger using the DaNLP wrapper
        flair_model = load_flair_ner_model()

        # Using the flair POS tagger
        sentence = Sentence(
            'jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen')
        flair_model.predict(sentence)

        expected_string = "jeg hopper på en bil som er rød sammen med Jens-Peter <B-PER> E. <I-PER> Hansen <I-PER>"

        self.assertEqual(sentence.to_tagged_string(), expected_string)
예제 #3
0
def benchmark_flair_mdl():
    tagger = load_flair_ner_model()

    start = time.time()

    flair_sentences = []
    for i, sentence in enumerate(sentences_tokens):
        flair_sentence = Sentence()

        for token_txt in sentence:
            flair_sentence.add_token(Token(token_txt))
        flair_sentences.append(flair_sentence)

    tagger.predict(flair_sentences, verbose=True)
    predictions = [[tok.get_tag('ner').value for tok in fs]
                   for fs in flair_sentences]
    print('Flair:')
    print_speed_performance(start, num_sentences, num_tokens)

    assert len(predictions) == num_sentences

    print(f1_report(sentences_entities, remove_miscs(predictions), bio=True))
예제 #4
0
 def setup(self):
     # A horrible hack to fix a check in flair.models.language_model for pytorch version that fails on cpu
     if "+cpu" in torch.__version__:
         torch.__version__ = torch.__version__.replace("+cpu", "")
     self.model = dm.load_flair_ner_model()
예제 #5
0
import time

# models
from danlp.models import load_bert_ner_model, load_flair_ner_model

# dataset
from danlp.datasets import DDT

# utils
from flair.data import Sentence, Token
from copy import deepcopy
from memory_profiler import profile

# load models
bert = load_bert_ner_model()
flair = load_flair_ner_model()

# get data (splitted into a training set, a validation set, and a test set)
ddt = DDT()
train, valid, test = ddt.load_as_simple_ner(True)

# divide the observations and the targets of the testset into new variables
sentences, categories = test

@profile
def get_bert_predictions(sentences):
    predictions = []

    '''for sentence in sentences:
        predictions.append(bert.predict(sentence)[1])'''
    pred = bert.predict(sentences[0])