def test_language_wordpiece_tok2vec_to_from_bytes(nlp, name): doc = nlp("hello world") assert is_valid_tensor(doc.tensor) nlp2 = PyTT_Language() nlp2.add_pipe(nlp2.create_pipe("sentencizer")) nlp2.add_pipe(PyTT_WordPiecer(nlp.vocab)) nlp2.add_pipe(PyTT_TokenVectorEncoder(nlp.vocab)) with pytest.raises(ValueError): new_doc = nlp2("hello world") nlp2.from_bytes(nlp.to_bytes()) new_doc = nlp2("hello world") assert is_valid_tensor(new_doc.tensor) assert new_doc._.pytt_word_pieces is not None
def test_language_wordpiece_to_from_bytes(name): nlp = PyTT_Language() nlp.add_pipe(nlp.create_pipe("sentencizer")) wordpiecer = PyTT_WordPiecer.from_pretrained(nlp.vocab, pytt_name=name) nlp.add_pipe(wordpiecer) doc = nlp("hello world") assert doc._.pytt_word_pieces is not None nlp2 = PyTT_Language() nlp2.add_pipe(nlp.create_pipe("sentencizer")) nlp2.add_pipe(PyTT_WordPiecer(nlp2.vocab)) with pytest.raises(ValueError): new_doc = nlp2("hello world") nlp2.from_bytes(nlp.to_bytes()) new_doc = nlp2("hello world") assert new_doc._.pytt_word_pieces is not None