def test_ents_reset(en_vocab): """Ensure that resetting doc.ents does not change anything""" text = ["This", "is", "a", "lion"] doc = Doc(en_vocab, words=text) cfg = {"model": DEFAULT_NER_MODEL} model = registry.resolve(cfg, validate=True)["model"] ner = EntityRecognizer(en_vocab, model) ner.initialize(lambda: [_ner_example(ner)]) ner(doc) orig_iobs = [t.ent_iob_ for t in doc] doc.ents = list(doc.ents) assert [t.ent_iob_ for t in doc] == orig_iobs
def test_doc_add_entities_set_ents_iob(en_vocab): text = ["This", "is", "a", "lion"] doc = Doc(en_vocab, words=text) cfg = {"model": DEFAULT_NER_MODEL} model = registry.resolve(cfg, validate=True)["model"] ner = EntityRecognizer(en_vocab, model) ner.initialize(lambda: [_ner_example(ner)]) ner(doc) doc.ents = [("ANIMAL", 3, 4)] assert [w.ent_iob_ for w in doc] == ["O", "O", "O", "B"] doc.ents = [("WORD", 0, 2)] assert [w.ent_iob_ for w in doc] == ["B", "I", "O", "O"]
def test_add_label_deserializes_correctly(): cfg = {"model": DEFAULT_NER_MODEL} model = registry.resolve(cfg, validate=True)["model"] ner1 = EntityRecognizer(Vocab(), model) ner1.add_label("C") ner1.add_label("B") ner1.add_label("A") ner1.initialize(lambda: [_ner_example(ner1)]) ner2 = EntityRecognizer(Vocab(), model) # the second model needs to be resized before we can call from_bytes ner2.model.attrs["resize_output"](ner2.model, ner1.moves.n_moves) ner2.from_bytes(ner1.to_bytes()) assert ner1.moves.n_moves == ner2.moves.n_moves for i in range(ner1.moves.n_moves): assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i)
def test_ents_reset(en_vocab): """Ensure that resetting doc.ents does not change anything""" text = ["This", "is", "a", "lion"] doc = Doc(en_vocab, words=text) config = { "learn_tokens": False, "min_action_freq": 30, "update_with_oracle_cut_size": 100, } cfg = {"model": DEFAULT_NER_MODEL} model = registry.resolve(cfg, validate=True)["model"] ner = EntityRecognizer(en_vocab, model, **config) ner.initialize(lambda: [_ner_example(ner)]) ner(doc) orig_iobs = [t.ent_iob_ for t in doc] doc.ents = list(doc.ents) assert [t.ent_iob_ for t in doc] == orig_iobs
def test_doc_add_entities_set_ents_iob(en_vocab): text = ["This", "is", "a", "lion"] doc = Doc(en_vocab, words=text) config = { "learn_tokens": False, "min_action_freq": 30, "update_with_oracle_cut_size": 100, } cfg = {"model": DEFAULT_NER_MODEL} model = registry.resolve(cfg, validate=True)["model"] ner = EntityRecognizer(en_vocab, model, **config) ner.initialize(lambda: [_ner_example(ner)]) ner(doc) doc.ents = [("ANIMAL", 3, 4)] assert [w.ent_iob_ for w in doc] == ["O", "O", "O", "B"] doc.ents = [("WORD", 0, 2)] assert [w.ent_iob_ for w in doc] == ["B", "I", "O", "O"]
def test_add_label_deserializes_correctly(): config = { "learn_tokens": False, "min_action_freq": 30, "update_with_oracle_cut_size": 100, } cfg = {"model": DEFAULT_NER_MODEL} model = registry.resolve(cfg, validate=True)["model"] ner1 = EntityRecognizer(Vocab(), model, **config) ner1.add_label("C") ner1.add_label("B") ner1.add_label("A") ner1.initialize(lambda: [_ner_example(ner1)]) ner2 = EntityRecognizer(Vocab(), model, **config) # the second model needs to be resized before we can call from_bytes ner2.model.attrs["resize_output"](ner2.model, ner1.moves.n_moves) ner2.from_bytes(ner1.to_bytes()) assert ner1.moves.n_moves == ner2.moves.n_moves for i in range(ner1.moves.n_moves): assert ner1.moves.get_class_name(i) == ner2.moves.get_class_name(i)