def runspacymodel(sentences, tagger, model): # model = 'en_core_web_sm' nlp = spacy.load(model) ner = EntityRecognizer(nlp.vocab) ner.from_disk(tagger) result = [] for sentence in sentences: doc = spacy.tokens.doc.Doc(nlp.vocab, words=sentence) # run ner against every sentence processed = ner(doc) for token in processed: result.append([token.text, token.ent_type_]) return result
def test_issue4042_bug2(): """ Test that serialization of an NER works fine when new labels were added. This is the second bug of two bugs underlying the issue 4042. """ nlp1 = English() vocab = nlp1.vocab # add ner pipe ner1 = nlp1.create_pipe("ner") ner1.add_label("SOME_LABEL") nlp1.add_pipe(ner1) nlp1.begin_training() # add a new label to the doc doc1 = nlp1("What do you think about Apple ?") assert len(ner1.labels) == 1 assert "SOME_LABEL" in ner1.labels apple_ent = Span(doc1, 5, 6, label="MY_ORG") doc1.ents = list(doc1.ents) + [apple_ent] # reapply the NER - at this point it should resize itself ner1(doc1) assert len(ner1.labels) == 2 assert "SOME_LABEL" in ner1.labels assert "MY_ORG" in ner1.labels with make_tempdir() as d: # assert IO goes fine output_dir = ensure_path(d) if not output_dir.exists(): output_dir.mkdir() ner1.to_disk(output_dir) nlp2 = English(vocab) ner2 = EntityRecognizer(vocab) ner2.from_disk(output_dir) assert len(ner2.labels) == 2
import spacy from spacy.pipeline import EntityRecognizer nlp = spacy.load('en', disable=['ner']) ner = EntityRecognizer(nlp.vocab) ner.from_disk('/usr/to/ner') nlp.add_pipe(ner, "custom_ner") print(nlp.meta['pipeline']) doc = nlp(u'Could you pick me up at Solnce?') for ent in doc.ents: print(ent.text, ent.label_)