def test_issue2833(en_vocab): """Test that a custom error is raised if a token or span is pickled.""" doc = Doc(en_vocab, words=["Hello", "world"]) with pytest.raises(NotImplementedError): pickle.dumps(doc[0]) with pytest.raises(NotImplementedError): pickle.dumps(doc[0:2])
def test_list_of_docs_pickles_efficiently(): nlp = Language() for i in range(10000): _ = nlp.vocab[unicode_(i)] # noqa: F841 one_pickled = pickle.dumps(nlp("0"), -1) docs = list(nlp.pipe(unicode_(i) for i in range(100))) many_pickled = pickle.dumps(docs, -1) assert len(many_pickled) < (len(one_pickled) * 2) many_unpickled = pickle.loads(many_pickled) assert many_unpickled[0].text == "0" assert many_unpickled[-1].text == "99" assert len(many_unpickled) == 100
def test_user_data_unpickles(): nlp = Language() doc = nlp("Hello") doc.user_data[(0, 1)] = False b = pickle.dumps(doc) doc2 = pickle.loads(b) assert doc2.user_data[(0, 1)] is False
def test_pickle_single_doc(): nlp = Language() doc = nlp("pickle roundtrip") doc._context = 3 data = pickle.dumps(doc, 1) doc2 = pickle.loads(data) assert doc2.text == "pickle roundtrip" assert doc2._context == 3
def test_issue3248_2(): """Test that the PhraseMatcher can be pickled correctly.""" nlp = English() matcher = PhraseMatcher(nlp.vocab) matcher.add("TEST1", [nlp("a"), nlp("b"), nlp("c")]) matcher.add("TEST2", [nlp("d")]) data = pickle.dumps(matcher) new_matcher = pickle.loads(data) assert len(new_matcher) == len(matcher)
def test_issue3248_2(): """Test that the PhraseMatcher can be pickled correctly.""" nlp = English() matcher = PhraseMatcher(nlp.vocab) matcher.add("TEST1", None, nlp("a"), nlp("b"), nlp("c")) matcher.add("TEST2", None, nlp("d")) data = pickle.dumps(matcher) new_matcher = pickle.loads(data) assert len(new_matcher) == len(matcher)
def test_hooks_unpickle(): def inner_func(d1, d2): return "hello!" nlp = Language() doc = nlp("Hello") doc.user_hooks["similarity"] = inner_func b = pickle.dumps(doc) doc2 = pickle.loads(b) assert doc2.similarity(None) == "hello!"
def test_kb_pickle(): # Test that the KB can be pickled nlp = English() kb_1 = KnowledgeBase(nlp.vocab, entity_vector_length=3) kb_1.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3]) assert not kb_1.contains_alias("Russ Cochran") kb_1.add_alias(alias="Russ Cochran", entities=["Q2146908"], probabilities=[0.8]) assert kb_1.contains_alias("Russ Cochran") data = pickle.dumps(kb_1) kb_2 = pickle.loads(data) assert kb_2.contains_alias("Russ Cochran")
def test_nel_pickle(): # Test that a pipeline with an EL component can be pickled def create_kb(vocab): kb = KnowledgeBase(vocab, entity_vector_length=3) kb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3]) kb.add_alias(alias="Russ Cochran", entities=["Q2146908"], probabilities=[0.8]) return kb nlp_1 = English() nlp_1.add_pipe("ner") entity_linker_1 = nlp_1.add_pipe("entity_linker", last=True) entity_linker_1.set_kb(create_kb) assert nlp_1.pipe_names == ["ner", "entity_linker"] assert entity_linker_1.kb.contains_alias("Russ Cochran") data = pickle.dumps(nlp_1) nlp_2 = pickle.loads(data) assert nlp_2.pipe_names == ["ner", "entity_linker"] entity_linker_2 = nlp_2.get_pipe("entity_linker") assert entity_linker_2.kb.contains_alias("Russ Cochran")
def test_pickle_single_doc(): nlp = Language() doc = nlp("pickle roundtrip") data = pickle.dumps(doc, 1) doc2 = pickle.loads(data) assert doc2.text == "pickle roundtrip"