def benchmark_stanza_mdl(): nlp = stanza.Pipeline('da', processors='tokenize,pos,lemma,depparse', tokenize_pretokenized=True) start = time.time() deps_pred = [] for sent in sentences_tokens: doc = nlp(" ".join(sent)) deprels = [] depheads = [] for tok in doc.iter_tokens(): deprels.append(tok.words[0].deprel) depheads.append(tok.words[0].head) deps_pred.append([(r, h) for r, h in zip(deprels, depheads)]) print('**Stanza model**') print_speed_performance(start, num_sentences, num_tokens) assert len(deps_pred) == num_sentences assert sum([len(s) for s in deps_pred]) == num_tokens print(dependency_report(deps_true, deps_pred))
def benchmark_spacy_mdl(): def normalize_spacy_head(i, hd): return 0 if i == hd else hd + 1 nlp = load_spacy_model() parser = nlp.parser start = time.time() deps_pred = [] for sent in sentences_tokens: doc = nlp.tokenizer.tokens_from_list(sent) doc = parser(doc) deprels = [] depheads = [] for i, tok in enumerate(doc): deprels.append(tok.dep_.lower()) depheads.append(normalize_spacy_head(i, tok.head.i)) deps_pred.append([(r, h) for r, h in zip(deprels, depheads)]) print('**Spacy model**') print_speed_performance(start, num_sentences, num_tokens) assert len(deps_pred) == num_sentences assert sum([len(s) for s in deps_pred]) == num_tokens print(dependency_report(deps_true, deps_pred))
def benchmark_dacy_mdl(dacy_model="da_dacy_large_tft-0.0.0"): """ an adaption of benchmark spacy model which is compatible with spacy v. 3 running this requires: spacy >= 3.0.0 spacy-transformers """ def normalize_spacy_head(i, hd): return 0 if i == hd else hd+1 from spacy.tokens import Doc import dacy nlp = dacy.load(dacy_model) trf = nlp.get_pipe('transformer') parser = nlp.get_pipe('parser') start = time.time() deps_pred = [] for sent in sentences_tokens: doc = Doc(nlp.vocab, words=sent) doc = trf(doc) doc = parser(doc) deprels = [] depheads = [] for i, tok in enumerate(doc): deprels.append(tok.dep_.lower()) depheads.append(normalize_spacy_head(i, tok.head.i)) deps_pred.append([(r,h) for r,h in zip(deprels, depheads)]) print('**Spacy model**') print_speed_performance(start, num_sentences, num_tokens) assert len(deps_pred)==num_sentences assert sum([len(s) for s in deps_pred])==num_tokens print(dependency_report(deps_true, deps_pred))