Beispiel #1
0
def test_serialize_transformer_data():
    data = {"x": TransformerData.empty()}
    bytes_data = srsly.msgpack_dumps(data)
    new_data = srsly.msgpack_loads(bytes_data)
    assert isinstance(new_data["x"], TransformerData)

    nlp = Language()
    trf = nlp.add_pipe(
        "transformer",
        config={
            "model": {
                "name": "distilbert-base-uncased",
                "transformer_config": {
                    "output_attentions": True
                },
            }
        },
    )
    nlp.initialize()
    doc = nlp("This is a test.")
    b = doc.to_bytes()
    reloaded_doc = Doc(nlp.vocab)
    reloaded_doc.from_bytes(b)
    assert_docs_equal(doc, reloaded_doc)
    for key in doc._.trf_data.model_output:
        assert_array_equal(doc._.trf_data.model_output[key],
                           reloaded_doc._.trf_data.model_output[key])
Beispiel #2
0
def check_serialization(nlp,
                        text: str = "It is a serialization set. 今日はとてもいい天気だった!"
                        ):
    with tempfile.TemporaryDirectory() as d:
        nlp.to_disk(str(d))
        nlp2 = spacy.load(str(d))
        assert_docs_equal(nlp(text), nlp2(text))
def test_multiprocessing(simple_nlp, texts):
    ops = get_current_ops()
    if isinstance(ops, NumpyOps):
        texts = texts * 3
        expecteds = [simple_nlp(text) for text in texts]
        docs = simple_nlp.pipe(texts, n_process=2, batch_size=2)

        for doc, expected_doc in zip(docs, expecteds):
            assert_docs_equal(doc, expected_doc)
Beispiel #4
0
def test_serialization(nlp, tmpdir):
    docs = [nlp(text) for text in TEXTS]
    for i in range(2):
        d = str(tmpdir + f"/{i}")
        nlp.to_disk(d)
        nlp = spacy.load(d)
        docs2 = [nlp(text) for text in TEXTS]
        for doc1, doc2 in zip(docs, docs2):
            assert_docs_equal(doc1, doc2)
Beispiel #5
0
def test_serialization(nlp, tmpdir):
    docs = [nlp(text) for text in TEXTS]
    nlp.to_disk(str(tmpdir))
    nlp2 = spacy.load(str(tmpdir))
    docs2 = [nlp2(text) for text in TEXTS]
    for doc1, doc2 in zip(docs, docs2):
        if spacy.__version__ == "2.2.4":
            # this version of spacy has a bug in `assert_docs_equal`.
            # see https://github.com/explosion/spaCy/issues/5144
            return
        assert_docs_equal(doc1, doc2)