def runspacymodel(sentences, tagger, model):
    # model = 'en_core_web_sm'
    nlp = spacy.load(model)
    ner = EntityRecognizer(nlp.vocab)
    ner.from_disk(tagger)

    result = []
    for sentence in sentences:
        doc = spacy.tokens.doc.Doc(nlp.vocab, words=sentence)

        # run ner against every sentence
        processed = ner(doc)
        for token in processed:
            result.append([token.text, token.ent_type_])
    return result
Example #2
0
def test_issue4042_bug2():
    """
    Test that serialization of an NER works fine when new labels were added.
    This is the second bug of two bugs underlying the issue 4042.
    """
    nlp1 = English()
    vocab = nlp1.vocab

    # add ner pipe
    ner1 = nlp1.create_pipe("ner")
    ner1.add_label("SOME_LABEL")
    nlp1.add_pipe(ner1)
    nlp1.begin_training()

    # add a new label to the doc
    doc1 = nlp1("What do you think about Apple ?")
    assert len(ner1.labels) == 1
    assert "SOME_LABEL" in ner1.labels
    apple_ent = Span(doc1, 5, 6, label="MY_ORG")
    doc1.ents = list(doc1.ents) + [apple_ent]

    # reapply the NER - at this point it should resize itself
    ner1(doc1)
    assert len(ner1.labels) == 2
    assert "SOME_LABEL" in ner1.labels
    assert "MY_ORG" in ner1.labels

    with make_tempdir() as d:
        # assert IO goes fine
        output_dir = ensure_path(d)
        if not output_dir.exists():
            output_dir.mkdir()
        ner1.to_disk(output_dir)

        nlp2 = English(vocab)
        ner2 = EntityRecognizer(vocab)
        ner2.from_disk(output_dir)
        assert len(ner2.labels) == 2
Example #3
0
import spacy
from spacy.pipeline import EntityRecognizer

nlp = spacy.load('en', disable=['ner'])
ner = EntityRecognizer(nlp.vocab)
ner.from_disk('/usr/to/ner')
nlp.add_pipe(ner, "custom_ner")
print(nlp.meta['pipeline'])
doc = nlp(u'Could you pick me up at Solnce?')
for ent in doc.ents:
    print(ent.text, ent.label_)