Esempio n. 1
0
def test_language_wordpiece_tok2vec_to_from_bytes(nlp, name):
    doc = nlp("hello world")
    assert is_valid_tensor(doc.tensor)
    nlp2 = PyTT_Language()
    nlp2.add_pipe(nlp2.create_pipe("sentencizer"))
    nlp2.add_pipe(PyTT_WordPiecer(nlp.vocab))
    nlp2.add_pipe(PyTT_TokenVectorEncoder(nlp.vocab))
    with pytest.raises(ValueError):
        new_doc = nlp2("hello world")
    nlp2.from_bytes(nlp.to_bytes())
    new_doc = nlp2("hello world")
    assert is_valid_tensor(new_doc.tensor)
    assert new_doc._.pytt_word_pieces is not None
Esempio n. 2
0
def test_language_wordpiece_to_from_bytes(name):
    nlp = PyTT_Language()
    nlp.add_pipe(nlp.create_pipe("sentencizer"))
    wordpiecer = PyTT_WordPiecer.from_pretrained(nlp.vocab, pytt_name=name)
    nlp.add_pipe(wordpiecer)
    doc = nlp("hello world")
    assert doc._.pytt_word_pieces is not None
    nlp2 = PyTT_Language()
    nlp2.add_pipe(nlp.create_pipe("sentencizer"))
    nlp2.add_pipe(PyTT_WordPiecer(nlp2.vocab))
    with pytest.raises(ValueError):
        new_doc = nlp2("hello world")
    nlp2.from_bytes(nlp.to_bytes())
    new_doc = nlp2("hello world")
    assert new_doc._.pytt_word_pieces is not None