예제 #1
0
def test_issue2833(en_vocab):
    """Test that a custom error is raised if a token or span is pickled."""
    doc = Doc(en_vocab, words=["Hello", "world"])
    with pytest.raises(NotImplementedError):
        pickle.dumps(doc[0])
    with pytest.raises(NotImplementedError):
        pickle.dumps(doc[0:2])
예제 #2
0
def test_list_of_docs_pickles_efficiently():
    nlp = Language()
    for i in range(10000):
        _ = nlp.vocab[unicode_(i)]  # noqa: F841
    one_pickled = pickle.dumps(nlp("0"), -1)
    docs = list(nlp.pipe(unicode_(i) for i in range(100)))
    many_pickled = pickle.dumps(docs, -1)
    assert len(many_pickled) < (len(one_pickled) * 2)
    many_unpickled = pickle.loads(many_pickled)
    assert many_unpickled[0].text == "0"
    assert many_unpickled[-1].text == "99"
    assert len(many_unpickled) == 100
예제 #3
0
def test_list_of_docs_pickles_efficiently():
    nlp = Language()
    for i in range(10000):
        _ = nlp.vocab[unicode_(i)]  # noqa: F841
    one_pickled = pickle.dumps(nlp("0"), -1)
    docs = list(nlp.pipe(unicode_(i) for i in range(100)))
    many_pickled = pickle.dumps(docs, -1)
    assert len(many_pickled) < (len(one_pickled) * 2)
    many_unpickled = pickle.loads(many_pickled)
    assert many_unpickled[0].text == "0"
    assert many_unpickled[-1].text == "99"
    assert len(many_unpickled) == 100
예제 #4
0
def test_user_data_unpickles():
    nlp = Language()
    doc = nlp("Hello")
    doc.user_data[(0, 1)] = False
    b = pickle.dumps(doc)
    doc2 = pickle.loads(b)
    assert doc2.user_data[(0, 1)] is False
예제 #5
0
def test_user_data_unpickles():
    nlp = Language()
    doc = nlp("Hello")
    doc.user_data[(0, 1)] = False
    b = pickle.dumps(doc)
    doc2 = pickle.loads(b)
    assert doc2.user_data[(0, 1)] is False
예제 #6
0
def test_pickle_single_doc():
    nlp = Language()
    doc = nlp("pickle roundtrip")
    doc._context = 3
    data = pickle.dumps(doc, 1)
    doc2 = pickle.loads(data)
    assert doc2.text == "pickle roundtrip"
    assert doc2._context == 3
예제 #7
0
def test_issue3248_2():
    """Test that the PhraseMatcher can be pickled correctly."""
    nlp = English()
    matcher = PhraseMatcher(nlp.vocab)
    matcher.add("TEST1", [nlp("a"), nlp("b"), nlp("c")])
    matcher.add("TEST2", [nlp("d")])
    data = pickle.dumps(matcher)
    new_matcher = pickle.loads(data)
    assert len(new_matcher) == len(matcher)
예제 #8
0
def test_issue3248_2():
    """Test that the PhraseMatcher can be pickled correctly."""
    nlp = English()
    matcher = PhraseMatcher(nlp.vocab)
    matcher.add("TEST1", None, nlp("a"), nlp("b"), nlp("c"))
    matcher.add("TEST2", None, nlp("d"))
    data = pickle.dumps(matcher)
    new_matcher = pickle.loads(data)
    assert len(new_matcher) == len(matcher)
예제 #9
0
def test_hooks_unpickle():
    def inner_func(d1, d2):
        return "hello!"

    nlp = Language()
    doc = nlp("Hello")
    doc.user_hooks["similarity"] = inner_func
    b = pickle.dumps(doc)
    doc2 = pickle.loads(b)
    assert doc2.similarity(None) == "hello!"
예제 #10
0
def test_hooks_unpickle():
    def inner_func(d1, d2):
        return "hello!"

    nlp = Language()
    doc = nlp("Hello")
    doc.user_hooks["similarity"] = inner_func
    b = pickle.dumps(doc)
    doc2 = pickle.loads(b)
    assert doc2.similarity(None) == "hello!"
예제 #11
0
def test_kb_pickle():
    # Test that the KB can be pickled
    nlp = English()
    kb_1 = KnowledgeBase(nlp.vocab, entity_vector_length=3)
    kb_1.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
    assert not kb_1.contains_alias("Russ Cochran")
    kb_1.add_alias(alias="Russ Cochran",
                   entities=["Q2146908"],
                   probabilities=[0.8])
    assert kb_1.contains_alias("Russ Cochran")
    data = pickle.dumps(kb_1)
    kb_2 = pickle.loads(data)
    assert kb_2.contains_alias("Russ Cochran")
예제 #12
0
def test_nel_pickle():
    # Test that a pipeline with an EL component can be pickled
    def create_kb(vocab):
        kb = KnowledgeBase(vocab, entity_vector_length=3)
        kb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
        kb.add_alias(alias="Russ Cochran",
                     entities=["Q2146908"],
                     probabilities=[0.8])
        return kb

    nlp_1 = English()
    nlp_1.add_pipe("ner")
    entity_linker_1 = nlp_1.add_pipe("entity_linker", last=True)
    entity_linker_1.set_kb(create_kb)
    assert nlp_1.pipe_names == ["ner", "entity_linker"]
    assert entity_linker_1.kb.contains_alias("Russ Cochran")

    data = pickle.dumps(nlp_1)
    nlp_2 = pickle.loads(data)
    assert nlp_2.pipe_names == ["ner", "entity_linker"]
    entity_linker_2 = nlp_2.get_pipe("entity_linker")
    assert entity_linker_2.kb.contains_alias("Russ Cochran")
예제 #13
0
def test_pickle_single_doc():
    nlp = Language()
    doc = nlp("pickle roundtrip")
    data = pickle.dumps(doc, 1)
    doc2 = pickle.loads(data)
    assert doc2.text == "pickle roundtrip"